From 4c07e11f26c8d8f654f8f10097381d9e57c1497e Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 4 Apr 2024 22:58:48 +0000 Subject: [PATCH] Deployed 1a964f9 with MkDocs version: 1.5.3 --- .nojekyll | 0 404.html | 624 ++ api/AnnotationMatrix/index.html | 3086 +++++ api/GWADataLoader/index.html | 7347 ++++++++++++ api/GenotypeMatrix/index.html | 7845 +++++++++++++ api/LDMatrix/index.html | 9918 +++++++++++++++++ api/SampleTable/index.html | 3727 +++++++ api/SumstatsTable/index.html | 5647 ++++++++++ api/overview/index.html | 896 ++ api/parsers/annotation_parsers/index.html | 1499 +++ api/parsers/misc_parsers/index.html | 1128 ++ api/parsers/plink_parsers/index.html | 1018 ++ api/parsers/sumstats_parsers/index.html | 3446 ++++++ api/plot/gwa/index.html | 1211 ++ api/plot/ld/index.html | 889 ++ .../AnnotatedPhenotypeSimulator/index.html | 1621 +++ .../MultiCohortPhenotypeSimulator/index.html | 1254 +++ api/simulation/PhenotypeSimulator/index.html | 3184 ++++++ api/stats/gwa/utils/index.html | 1742 +++ api/stats/h2/ldsc/index.html | 1138 ++ api/stats/ld/estimator/index.html | 4125 +++++++ api/stats/ld/utils/index.html | 2518 +++++ api/stats/score/utils/index.html | 1041 ++ api/stats/transforms/genotype/index.html | 835 ++ api/stats/transforms/phenotype/index.html | 1418 +++ api/stats/variant/utils/index.html | 1078 ++ api/utils/compute_utils/index.html | 1053 ++ api/utils/data_utils/index.html | 801 ++ api/utils/executors/index.html | 1436 +++ api/utils/model_utils/index.html | 2274 ++++ api/utils/system_utils/index.html | 1453 +++ assets/_mkdocstrings.css | 114 + assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.1e8ae164.min.js | 29 + assets/javascripts/bundle.1e8ae164.min.js.map | 7 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++++++ .../workers/search.b8dbb3d2.min.js | 42 + .../workers/search.b8dbb3d2.min.js.map | 7 + assets/stylesheets/main.bcfcd587.min.css | 1 + assets/stylesheets/main.bcfcd587.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + citation/index.html | 784 ++ commandline/magenpy_ld/index.html | 837 ++ commandline/magenpy_simulate/index.html | 823 ++ commandline/overview/index.html | 723 ++ faq/index.html | 707 ++ features/index.html | 1242 +++ getting_started/index.html | 812 ++ index.html | 809 ++ installation/index.html | 920 ++ objects.inv | Bin 0 -> 3746 bytes search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes tutorials/overview/index.html | 707 ++ 89 files changed, 91024 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 api/AnnotationMatrix/index.html create mode 100644 api/GWADataLoader/index.html create mode 100644 api/GenotypeMatrix/index.html create mode 100644 api/LDMatrix/index.html create mode 100644 api/SampleTable/index.html create mode 100644 api/SumstatsTable/index.html create mode 100644 api/overview/index.html create mode 100644 api/parsers/annotation_parsers/index.html create mode 100644 api/parsers/misc_parsers/index.html create mode 100644 api/parsers/plink_parsers/index.html create mode 100644 api/parsers/sumstats_parsers/index.html create mode 100644 api/plot/gwa/index.html create mode 100644 api/plot/ld/index.html create mode 100644 api/simulation/AnnotatedPhenotypeSimulator/index.html create mode 100644 api/simulation/MultiCohortPhenotypeSimulator/index.html create mode 100644 api/simulation/PhenotypeSimulator/index.html create mode 100644 api/stats/gwa/utils/index.html create mode 100644 api/stats/h2/ldsc/index.html create mode 100644 api/stats/ld/estimator/index.html create mode 100644 api/stats/ld/utils/index.html create mode 100644 api/stats/score/utils/index.html create mode 100644 api/stats/transforms/genotype/index.html create mode 100644 api/stats/transforms/phenotype/index.html create mode 100644 api/stats/variant/utils/index.html create mode 100644 api/utils/compute_utils/index.html create mode 100644 api/utils/data_utils/index.html create mode 100644 api/utils/executors/index.html create mode 100644 api/utils/model_utils/index.html create mode 100644 api/utils/system_utils/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.1e8ae164.min.js create mode 100644 assets/javascripts/bundle.1e8ae164.min.js.map create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js create mode 100644 assets/javascripts/workers/search.b8dbb3d2.min.js.map create mode 100644 assets/stylesheets/main.bcfcd587.min.css create mode 100644 assets/stylesheets/main.bcfcd587.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 citation/index.html create mode 100644 commandline/magenpy_ld/index.html create mode 100644 commandline/magenpy_simulate/index.html create mode 100644 commandline/overview/index.html create mode 100644 faq/index.html create mode 100644 features/index.html create mode 100644 getting_started/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 objects.inv create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 tutorials/overview/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/404.html b/404.html new file mode 100644 index 0000000..51e86ea --- /dev/null +++ b/404.html @@ -0,0 +1,624 @@ + + + + + + + + + + + + + + + + + + + magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/AnnotationMatrix/index.html b/api/AnnotationMatrix/index.html new file mode 100644 index 0000000..2ff69ca --- /dev/null +++ b/api/AnnotationMatrix/index.html @@ -0,0 +1,3086 @@ + + + + + + + + + + + + + + + + + + + AnnotationMatrix - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

AnnotationMatrix

+ +
+ + + + +
+

+ Bases: object

+ + +

A wrapper class for handling annotation matrices, which are essentially tables of +features for each variant in the genome. These features include information such as +whether the variant is in coding regions, enhancers, etc. It can also include continuous +features derived from experimental assays or other sources.

+

The purpose of this class is to present a unified and consistent interface for handling +annotations across different tools and applications. It should be able to read and write +annotation matrices in different formats, filter annotations, and perform basic operations +on the annotation matrix. It should also allow users to define new custom annotations +that can be used for downstream statistical genetics applications.

+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
table + +
+

A pandas dataframe containing the annotation information.

+
+
_annotations + +
+

A list or array of column namees to consider as annotations. If not provided, will be inferred heuristically, though we recommend that the user specify this information.

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
class AnnotationMatrix(object):
+    """
+    A wrapper class for handling annotation matrices, which are essentially tables of
+    features for each variant in the genome. These features include information such as
+    whether the variant is in coding regions, enhancers, etc. It can also include continuous
+    features derived from experimental assays or other sources.
+
+    The purpose of this class is to present a unified and consistent interface for handling
+    annotations across different tools and applications. It should be able to read and write
+    annotation matrices in different formats, filter annotations, and perform basic operations
+    on the annotation matrix. It should also allow users to define new custom annotations
+    that can be used for downstream statistical genetics applications.
+
+    :ivar table: A pandas dataframe containing the annotation information.
+    :ivar _annotations: A list or array of column namees to consider as annotations. If not provided,
+    will be inferred heuristically, though we recommend that the user specify this information.
+    """
+
+    def __init__(self, annotation_table=None, annotations=None):
+        """
+        Initialize an AnnotationMatrix object
+
+        :param annotation_table: A pandas dataframe containing the annotation information.
+        :param annotations: A list of array of columns to consider as annotations. If not provided, will be
+        inferred heuristically, though we recommend that the user specify this information.
+        """
+
+        self.table = annotation_table
+        self._annotations = annotations
+
+        if self.table is not None:
+            if self._annotations is None:
+                self._annotations = [ann for ann in self.table.columns if ann not in ('CHR', 'SNP', 'POS')]
+                if len(self._annotations) < 1:
+                    self._annotations = None
+
+    @classmethod
+    def from_file(cls, annot_file, annot_format='magenpy', annot_parser=None,
+                  **parse_kwargs):
+        """
+        Initialize an AnnotationMatrix object from a file.
+
+        :param annot_file: The path to the annotation file.
+        :param annot_format: The format of the annotation file. For now, we mainly support
+        annotation files in the `magenpy` and `ldsc` formats.
+        :param annot_parser: An `AnnotationMatrixParser` derived object, which can be tailored to
+        specific annotation formats that the user has.
+        :param parse_kwargs: arguments for the pandas `read_csv` function, such as the delimiter.
+
+        :return: An instance of the `AnnotationMatrix` class.
+        """
+
+        from .parsers.annotation_parsers import AnnotationMatrixParser, LDSCAnnotationMatrixParser
+
+        if annot_parser is None:
+            if annot_format == 'magenpy':
+                annot_parser = AnnotationMatrixParser(None, **parse_kwargs)
+            elif annot_format == 'ldsc':
+                annot_parser = LDSCAnnotationMatrixParser(None, **parse_kwargs)
+            else:
+                raise KeyError(f"Annotation matrix format {annot_format} not recognized!")
+
+        annot_table, annotations = annot_parser.parse(annot_file)
+
+        annot_mat = cls(annotation_table=annot_table, annotations=annotations)
+
+        return annot_mat
+
+    @property
+    def shape(self):
+        """
+        :return: The dimensions of the annotation matrix (number of variants x number of annotations).
+        """
+        return self.n_snps, self.n_annotations
+
+    @property
+    def n_snps(self):
+        """
+        :return: The number of variants in the annotation matrix.
+        """
+        return len(self.table)
+
+    @property
+    def chromosome(self):
+        """
+        A convenience method to get the chromosome if there is only one chromosome in the annotation matrix.
+
+        :return: The chromosome number if there is only one chromosome in the annotation matrix. Otherwise, None.
+        """
+        chrom = self.chromosomes
+        if chrom is not None:
+            if len(chrom) == 1:
+                return chrom[0]
+
+    @property
+    def chromosomes(self):
+        """
+        :return: The list of unique chromosomes in the annotation matrix.
+        """
+        if 'CHR' in self.table.columns:
+            return self.table['CHR'].unique()
+
+    @property
+    def snps(self):
+        """
+        :return: The list of SNP rsIDs in the annotation matrix.
+        """
+        return self.table['SNP'].values
+
+    @property
+    def n_annotations(self):
+        """
+        :return: The number of annotations in the annotation matrix.
+        """
+        if self.annotations is None:
+            return 0
+        else:
+            return len(self.annotations)
+
+    @property
+    def binary_annotations(self):
+        """
+        :return: A list of binary (0/1) annotations in the annotation matrix.
+        """
+        assert self.annotations is not None
+        return np.array([c for c in self.annotations
+                         if len(self.table[c].unique()) == 2])
+
+    @property
+    def annotations(self):
+        """
+        :return: The list of annotation names or IDs in the annotation matrix.
+        """
+        return self._annotations
+
+    def values(self, add_intercept=False):
+        """
+        :param add_intercept: Adds a base annotation corresponding to the intercept.
+
+        :return: The annotation matrix as a numpy matrix.
+        :raises KeyError: If no annotations are defined in the table.
+        """
+
+        if self.annotations is None:
+            raise KeyError("No annotations are defined in this table!")
+        annot_mat = self.table[self.annotations].values
+        if add_intercept:
+            return np.hstack([np.ones((annot_mat.shape[0], 1)), annot_mat])
+        else:
+            return annot_mat
+
+    def filter_snps(self, extract_snps=None, extract_file=None):
+        """
+        Filter variants from the annotation matrix. User must specify
+        either a list of variants to extract or the path to a file
+        with the list of variants to extract.
+
+        :param extract_snps: A list or array of SNP IDs to keep in the annotation matrix.
+        :param extract_file: The path to a file with the list of variants to extract.
+        """
+
+        assert extract_snps is not None or extract_file is not None
+
+        if extract_file is not None:
+            from .parsers.misc_parsers import read_snp_filter_file
+            extract_snps = read_snp_filter_file(extract_file)
+
+        from .utils.compute_utils import intersect_arrays
+
+        arr_idx = intersect_arrays(self.snps, extract_snps, return_index=True)
+
+        self.table = self.table.iloc[arr_idx, :].reset_index()
+
+    def filter_annotations(self, keep_annotations):
+        """
+        Filter the list of annotations in the matrix.
+        :param keep_annotations: A list or array of annotations to keep.
+        """
+
+        if self.annotations is None:
+            return
+
+        self._annotations = [annot for annot in self._annotations if annot in keep_annotations]
+        self.table = self.table[['CHR', 'SNP', 'POS'] + self._annotations]
+
+    def add_annotation(self, annot_vec, annotation_name):
+        """
+        Add an annotation vector or list to the AnnotationMatrix object.
+
+        :param annot_vec: A vector/list/Series containing the annotation information for each SNP in the
+        AnnotationMatrix. For now, it's the responsibility of the user to make sure that the annotation
+        list or vector are sorted properly.
+        :param annotation_name: The name of the annotation to create. Make sure the name is not already
+        in the matrix!
+        """
+
+        if self.annotations is not None:
+            assert annotation_name not in self.annotations
+        assert len(annot_vec) == self.n_snps
+
+        self.table[annotation_name] = annot_vec
+
+        if self.annotations is None:
+            self._annotations = [annotation_name]
+        else:
+            self._annotations = list(self._annotations) + [annotation_name]
+
+    def add_annotation_from_bed(self, bed_file, annotation_name):
+        """
+        Add an annotation to the AnnotationMatrix from a BED file that lists
+        the range of coordinates associated with that annotation (e.g. coding regions, enhancers, etc.).
+        The BED file has to adhere to the format specified by,
+        https://uswest.ensembl.org/info/website/upload/bed.html
+        with the first three columns being:
+
+        CHR StartCoordinate EndCoordinate ...
+
+        !!! note
+            This implementation is quite slow at the moment. May need to find more efficient
+            ways to do the merge over list of ranges.
+
+        :param bed_file: The path to the BED file containing the annotation coordinates.
+        :param annotation_name: The name of the annotation to create. Make sure the name is not already
+        in the matrix!
+
+        :raises AssertionError: If the annotation name is already in the matrix.
+        """
+
+        from .parsers.annotation_parsers import parse_annotation_bed_file
+
+        if self.annotations is not None:
+            assert annotation_name not in self.annotations
+
+        bed_df = parse_annotation_bed_file(bed_file)
+        # Group the BED annotation file by chromosome:
+        range_groups = bed_df.groupby('CHR').groups
+
+        def annotation_overlap(row):
+            """
+            This function takes a row from the annotation matrix table
+            and returns True if and only if the BP position for the
+            SNP is within the range specified by the annotation BED file.
+            """
+            try:
+                chr_range = bed_df.iloc[range_groups[row['CHR']], :]
+            except KeyError:
+                return False
+
+            check = (chr_range.Start <= row['POS']) & (chr_range.End >= row['POS'])
+            return int(np.any(check))
+
+        self.table[annotation_name] = self.table.apply(annotation_overlap, axis=1)
+
+        if self.annotations is None:
+            self._annotations = [annotation_name]
+        else:
+            self._annotations = list(self._annotations) + [annotation_name]
+
+    def get_binary_annotation_index(self, bin_annot):
+        """
+        :param bin_annot: The name of the binary annotation for which to fetch the relevant variants.
+        :return: The indices of all variants that belong to binary annotation `bin_annot`
+        """
+        assert bin_annot in self.binary_annotations
+        return np.where(self.table[bin_annot] == 1)[0]
+
+    def split_by_chromosome(self):
+        """
+        Split the annotation matrix by chromosome.
+
+        :return: A dictionary of `AnnotationMatrix` objects, where the keys are the chromosome numbers.
+        """
+
+        if 'CHR' in self.table.columns:
+            chrom_tables = self.table.groupby('CHR')
+            return {
+                c: AnnotationMatrix(annotation_table=chrom_tables.get_group(c),
+                                    annotations=self.annotations)
+                for c in chrom_tables.groups
+            }
+        else:
+            raise KeyError("Chromosome information is not available in the annotation table!")
+
+    def to_file(self, output_path, col_subset=None, compress=True, **to_csv_kwargs):
+        """
+        A convenience method to write the annotation matrix to a file.
+
+        :param output_path: The path and prefix to the file where to write the annotation matrix.
+        :param col_subset: A subset of the columns to write to file.
+        :param compress: Whether to compress the output file (default: True).
+        :param to_csv_kwargs: Key-word arguments to the pandas csv writer.
+        """
+
+        if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+            to_csv_kwargs['sep'] = '\t'
+
+        if 'index' not in to_csv_kwargs:
+            to_csv_kwargs['index'] = False
+
+        if col_subset is not None:
+            table = self.table[col_subset]
+        else:
+            table = self.table
+
+        file_name = output_path + '.annot'
+        if compress:
+            file_name += '.gz'
+
+        table.to_csv(file_name, **to_csv_kwargs)
+
+
+ + + +
+ + + + + + + +
+ + + +

+ annotations + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The list of annotation names or IDs in the annotation matrix.

+
+
+
+ +
+ +
+ + + +

+ binary_annotations + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A list of binary (0/1) annotations in the annotation matrix.

+
+
+
+ +
+ +
+ + + +

+ chromosome + + + property + + +

+ + +
+ +

A convenience method to get the chromosome if there is only one chromosome in the annotation matrix.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chromosome number if there is only one chromosome in the annotation matrix. Otherwise, None.

+
+
+
+ +
+ +
+ + + +

+ chromosomes + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The list of unique chromosomes in the annotation matrix.

+
+
+
+ +
+ +
+ + + +

+ n_annotations + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of annotations in the annotation matrix.

+
+
+
+ +
+ +
+ + + +

+ n_snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the annotation matrix.

+
+
+
+ +
+ +
+ + + +

+ shape + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The dimensions of the annotation matrix (number of variants x number of annotations).

+
+
+
+ +
+ +
+ + + +

+ snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The list of SNP rsIDs in the annotation matrix.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(annotation_table=None, annotations=None) + +

+ + +
+ +

Initialize an AnnotationMatrix object

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annotation_table + +
+

A pandas dataframe containing the annotation information.

+
+
+ None +
annotations + +
+

A list of array of columns to consider as annotations. If not provided, will be inferred heuristically, though we recommend that the user specify this information.

+
+
+ None +
+ +
+ Source code in magenpy/AnnotationMatrix.py +
22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
def __init__(self, annotation_table=None, annotations=None):
+    """
+    Initialize an AnnotationMatrix object
+
+    :param annotation_table: A pandas dataframe containing the annotation information.
+    :param annotations: A list of array of columns to consider as annotations. If not provided, will be
+    inferred heuristically, though we recommend that the user specify this information.
+    """
+
+    self.table = annotation_table
+    self._annotations = annotations
+
+    if self.table is not None:
+        if self._annotations is None:
+            self._annotations = [ann for ann in self.table.columns if ann not in ('CHR', 'SNP', 'POS')]
+            if len(self._annotations) < 1:
+                self._annotations = None
+
+
+
+ +
+ + +
+ + + +

+ add_annotation(annot_vec, annotation_name) + +

+ + +
+ +

Add an annotation vector or list to the AnnotationMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annot_vec + +
+

A vector/list/Series containing the annotation information for each SNP in the AnnotationMatrix. For now, it's the responsibility of the user to make sure that the annotation list or vector are sorted properly.

+
+
+ required +
annotation_name + +
+

The name of the annotation to create. Make sure the name is not already in the matrix!

+
+
+ required +
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def add_annotation(self, annot_vec, annotation_name):
+    """
+    Add an annotation vector or list to the AnnotationMatrix object.
+
+    :param annot_vec: A vector/list/Series containing the annotation information for each SNP in the
+    AnnotationMatrix. For now, it's the responsibility of the user to make sure that the annotation
+    list or vector are sorted properly.
+    :param annotation_name: The name of the annotation to create. Make sure the name is not already
+    in the matrix!
+    """
+
+    if self.annotations is not None:
+        assert annotation_name not in self.annotations
+    assert len(annot_vec) == self.n_snps
+
+    self.table[annotation_name] = annot_vec
+
+    if self.annotations is None:
+        self._annotations = [annotation_name]
+    else:
+        self._annotations = list(self._annotations) + [annotation_name]
+
+
+
+ +
+ + +
+ + + +

+ add_annotation_from_bed(bed_file, annotation_name) + +

+ + +
+ +

Add an annotation to the AnnotationMatrix from a BED file that lists +the range of coordinates associated with that annotation (e.g. coding regions, enhancers, etc.). +The BED file has to adhere to the format specified by, +https://uswest.ensembl.org/info/website/upload/bed.html +with the first three columns being:

+

CHR StartCoordinate EndCoordinate ...

+
+

Note

+

This implementation is quite slow at the moment. May need to find more efficient +ways to do the merge over list of ranges.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_file + +
+

The path to the BED file containing the annotation coordinates.

+
+
+ required +
annotation_name + +
+

The name of the annotation to create. Make sure the name is not already in the matrix!

+
+
+ required +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ AssertionError + +
+

If the annotation name is already in the matrix.

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def add_annotation_from_bed(self, bed_file, annotation_name):
+    """
+    Add an annotation to the AnnotationMatrix from a BED file that lists
+    the range of coordinates associated with that annotation (e.g. coding regions, enhancers, etc.).
+    The BED file has to adhere to the format specified by,
+    https://uswest.ensembl.org/info/website/upload/bed.html
+    with the first three columns being:
+
+    CHR StartCoordinate EndCoordinate ...
+
+    !!! note
+        This implementation is quite slow at the moment. May need to find more efficient
+        ways to do the merge over list of ranges.
+
+    :param bed_file: The path to the BED file containing the annotation coordinates.
+    :param annotation_name: The name of the annotation to create. Make sure the name is not already
+    in the matrix!
+
+    :raises AssertionError: If the annotation name is already in the matrix.
+    """
+
+    from .parsers.annotation_parsers import parse_annotation_bed_file
+
+    if self.annotations is not None:
+        assert annotation_name not in self.annotations
+
+    bed_df = parse_annotation_bed_file(bed_file)
+    # Group the BED annotation file by chromosome:
+    range_groups = bed_df.groupby('CHR').groups
+
+    def annotation_overlap(row):
+        """
+        This function takes a row from the annotation matrix table
+        and returns True if and only if the BP position for the
+        SNP is within the range specified by the annotation BED file.
+        """
+        try:
+            chr_range = bed_df.iloc[range_groups[row['CHR']], :]
+        except KeyError:
+            return False
+
+        check = (chr_range.Start <= row['POS']) & (chr_range.End >= row['POS'])
+        return int(np.any(check))
+
+    self.table[annotation_name] = self.table.apply(annotation_overlap, axis=1)
+
+    if self.annotations is None:
+        self._annotations = [annotation_name]
+    else:
+        self._annotations = list(self._annotations) + [annotation_name]
+
+
+
+ +
+ + +
+ + + +

+ filter_annotations(keep_annotations) + +

+ + +
+ +

Filter the list of annotations in the matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
keep_annotations + +
+

A list or array of annotations to keep.

+
+
+ required +
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def filter_annotations(self, keep_annotations):
+    """
+    Filter the list of annotations in the matrix.
+    :param keep_annotations: A list or array of annotations to keep.
+    """
+
+    if self.annotations is None:
+        return
+
+    self._annotations = [annot for annot in self._annotations if annot in keep_annotations]
+    self.table = self.table[['CHR', 'SNP', 'POS'] + self._annotations]
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None) + +

+ + +
+ +

Filter variants from the annotation matrix. User must specify +either a list of variants to extract or the path to a file +with the list of variants to extract.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list or array of SNP IDs to keep in the annotation matrix.

+
+
+ None +
extract_file + +
+

The path to a file with the list of variants to extract.

+
+
+ None +
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def filter_snps(self, extract_snps=None, extract_file=None):
+    """
+    Filter variants from the annotation matrix. User must specify
+    either a list of variants to extract or the path to a file
+    with the list of variants to extract.
+
+    :param extract_snps: A list or array of SNP IDs to keep in the annotation matrix.
+    :param extract_file: The path to a file with the list of variants to extract.
+    """
+
+    assert extract_snps is not None or extract_file is not None
+
+    if extract_file is not None:
+        from .parsers.misc_parsers import read_snp_filter_file
+        extract_snps = read_snp_filter_file(extract_file)
+
+    from .utils.compute_utils import intersect_arrays
+
+    arr_idx = intersect_arrays(self.snps, extract_snps, return_index=True)
+
+    self.table = self.table.iloc[arr_idx, :].reset_index()
+
+
+
+ +
+ + +
+ + + +

+ from_file(annot_file, annot_format='magenpy', annot_parser=None, **parse_kwargs) + + + classmethod + + +

+ + +
+ +

Initialize an AnnotationMatrix object from a file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annot_file + +
+

The path to the annotation file.

+
+
+ required +
annot_format + +
+

The format of the annotation file. For now, we mainly support annotation files in the magenpy and ldsc formats.

+
+
+ 'magenpy' +
annot_parser + +
+

An AnnotationMatrixParser derived object, which can be tailored to specific annotation formats that the user has.

+
+
+ None +
parse_kwargs + +
+

arguments for the pandas read_csv function, such as the delimiter.

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An instance of the AnnotationMatrix class.

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
@classmethod
+def from_file(cls, annot_file, annot_format='magenpy', annot_parser=None,
+              **parse_kwargs):
+    """
+    Initialize an AnnotationMatrix object from a file.
+
+    :param annot_file: The path to the annotation file.
+    :param annot_format: The format of the annotation file. For now, we mainly support
+    annotation files in the `magenpy` and `ldsc` formats.
+    :param annot_parser: An `AnnotationMatrixParser` derived object, which can be tailored to
+    specific annotation formats that the user has.
+    :param parse_kwargs: arguments for the pandas `read_csv` function, such as the delimiter.
+
+    :return: An instance of the `AnnotationMatrix` class.
+    """
+
+    from .parsers.annotation_parsers import AnnotationMatrixParser, LDSCAnnotationMatrixParser
+
+    if annot_parser is None:
+        if annot_format == 'magenpy':
+            annot_parser = AnnotationMatrixParser(None, **parse_kwargs)
+        elif annot_format == 'ldsc':
+            annot_parser = LDSCAnnotationMatrixParser(None, **parse_kwargs)
+        else:
+            raise KeyError(f"Annotation matrix format {annot_format} not recognized!")
+
+    annot_table, annotations = annot_parser.parse(annot_file)
+
+    annot_mat = cls(annotation_table=annot_table, annotations=annotations)
+
+    return annot_mat
+
+
+
+ +
+ + +
+ + + +

+ get_binary_annotation_index(bin_annot) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bin_annot + +
+

The name of the binary annotation for which to fetch the relevant variants.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The indices of all variants that belong to binary annotation bin_annot

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def get_binary_annotation_index(self, bin_annot):
+    """
+    :param bin_annot: The name of the binary annotation for which to fetch the relevant variants.
+    :return: The indices of all variants that belong to binary annotation `bin_annot`
+    """
+    assert bin_annot in self.binary_annotations
+    return np.where(self.table[bin_annot] == 1)[0]
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome() + +

+ + +
+ +

Split the annotation matrix by chromosome.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary of AnnotationMatrix objects, where the keys are the chromosome numbers.

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def split_by_chromosome(self):
+    """
+    Split the annotation matrix by chromosome.
+
+    :return: A dictionary of `AnnotationMatrix` objects, where the keys are the chromosome numbers.
+    """
+
+    if 'CHR' in self.table.columns:
+        chrom_tables = self.table.groupby('CHR')
+        return {
+            c: AnnotationMatrix(annotation_table=chrom_tables.get_group(c),
+                                annotations=self.annotations)
+            for c in chrom_tables.groups
+        }
+    else:
+        raise KeyError("Chromosome information is not available in the annotation table!")
+
+
+
+ +
+ + +
+ + + +

+ to_file(output_path, col_subset=None, compress=True, **to_csv_kwargs) + +

+ + +
+ +

A convenience method to write the annotation matrix to a file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_path + +
+

The path and prefix to the file where to write the annotation matrix.

+
+
+ required +
col_subset + +
+

A subset of the columns to write to file.

+
+
+ None +
compress + +
+

Whether to compress the output file (default: True).

+
+
+ True +
to_csv_kwargs + +
+

Key-word arguments to the pandas csv writer.

+
+
+ {} +
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def to_file(self, output_path, col_subset=None, compress=True, **to_csv_kwargs):
+    """
+    A convenience method to write the annotation matrix to a file.
+
+    :param output_path: The path and prefix to the file where to write the annotation matrix.
+    :param col_subset: A subset of the columns to write to file.
+    :param compress: Whether to compress the output file (default: True).
+    :param to_csv_kwargs: Key-word arguments to the pandas csv writer.
+    """
+
+    if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+        to_csv_kwargs['sep'] = '\t'
+
+    if 'index' not in to_csv_kwargs:
+        to_csv_kwargs['index'] = False
+
+    if col_subset is not None:
+        table = self.table[col_subset]
+    else:
+        table = self.table
+
+    file_name = output_path + '.annot'
+    if compress:
+        file_name += '.gz'
+
+    table.to_csv(file_name, **to_csv_kwargs)
+
+
+
+ +
+ + +
+ + + +

+ values(add_intercept=False) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
add_intercept + +
+

Adds a base annotation corresponding to the intercept.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The annotation matrix as a numpy matrix.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If no annotations are defined in the table.

+
+
+ +
+ Source code in magenpy/AnnotationMatrix.py +
def values(self, add_intercept=False):
+    """
+    :param add_intercept: Adds a base annotation corresponding to the intercept.
+
+    :return: The annotation matrix as a numpy matrix.
+    :raises KeyError: If no annotations are defined in the table.
+    """
+
+    if self.annotations is None:
+        raise KeyError("No annotations are defined in this table!")
+    annot_mat = self.table[self.annotations].values
+    if add_intercept:
+        return np.hstack([np.ones((annot_mat.shape[0], 1)), annot_mat])
+    else:
+        return annot_mat
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/GWADataLoader/index.html b/api/GWADataLoader/index.html new file mode 100644 index 0000000..bb678b3 --- /dev/null +++ b/api/GWADataLoader/index.html @@ -0,0 +1,7347 @@ + + + + + + + + + + + + + + + + + + + GWADataLoader - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

GWADataLoader

+ +
+ + + + +
+

+ Bases: object

+ + +

A class to load and manage multiple data sources for genetic association studies. +This class is designed to handle genotype matrices, summary statistics, LD matrices, +and annotation matrices. It also provides functionalities to filter samples and/or SNPs, +harmonize data sources, and compute LD matrices. This is all done in order to facilitate +downstream statistical genetics analyses that require multiple data sources to be aligned +and harmonized. The use cases include:

+
    +
  • Summary statistics-based PRS computation
  • +
  • Summary statistics-based heritability estimation.
  • +
  • Complex trait simulation.
  • +
  • Performing Genome-wide association tests.
  • +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
genotype + Union[Dict[int, GenotypeMatrix], None] + +
+

A dictionary of GenotypeMatrix objects, where the key is the chromosome number.

+
+
sample_table + Union[SampleTable, None] + +
+

A SampleTable object containing the sample information.

+
+
phenotype_likelihood + str + +
+

The likelihood of the phenotype (e.g. gaussian, binomial).

+
+
ld + Union[Dict[int, LDMatrix], None] + +
+

A dictionary of LDMatrix objects, where the key is the chromosome number.

+
+
sumstats_table + Union[Dict[int, SumstatsTable], None] + +
+

A dictionary of SumstatsTable objects, where the key is the chromosome number.

+
+
annotation + Union[Dict[int, AnnotationMatrix], None] + +
+

A dictionary of AnnotationMatrix objects, where the key is the chromosome number.

+
+
backend + +
+

The backend software used for the computation. Currently, supports xarray and plink.

+
+
temp_dir + +
+

The temporary directory where we store intermediate files (if necessary).

+
+
output_dir + +
+

The output directory where we store the results of the computation.

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
  21
+  22
+  23
+  24
+  25
+  26
+  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
class GWADataLoader(object):
+    """
+    A class to load and manage multiple data sources for genetic association studies.
+    This class is designed to handle genotype matrices, summary statistics, LD matrices,
+    and annotation matrices. It also provides functionalities to filter samples and/or SNPs,
+    harmonize data sources, and compute LD matrices. This is all done in order to facilitate
+    downstream statistical genetics analyses that require multiple data sources to be aligned
+    and harmonized. The use cases include:
+
+    * Summary statistics-based PRS computation
+    * Summary statistics-based heritability estimation.
+    * Complex trait simulation.
+    * Performing Genome-wide association tests.
+
+    :ivar genotype: A dictionary of `GenotypeMatrix` objects, where the key is the chromosome number.
+    :ivar sample_table: A `SampleTable` object containing the sample information.
+    :ivar phenotype_likelihood: The likelihood of the phenotype (e.g. `gaussian`, `binomial`).
+    :ivar ld: A dictionary of `LDMatrix` objects, where the key is the chromosome number.
+    :ivar sumstats_table: A dictionary of `SumstatsTable` objects, where the key is the chromosome number.
+    :ivar annotation: A dictionary of `AnnotationMatrix` objects, where the key is the chromosome number.
+    :ivar backend: The backend software used for the computation. Currently, supports `xarray` and `plink`.
+    :ivar temp_dir: The temporary directory where we store intermediate files (if necessary).
+    :ivar output_dir: The output directory where we store the results of the computation.
+    """
+
+    def __init__(self,
+                 bed_files=None,
+                 phenotype_file=None,
+                 covariates_file=None,
+                 keep_samples=None,
+                 keep_file=None,
+                 extract_snps=None,
+                 extract_file=None,
+                 min_maf=None,
+                 min_mac=None,
+                 drop_duplicated=True,
+                 phenotype_likelihood='gaussian',
+                 sumstats_files=None,
+                 sumstats_format='magenpy',
+                 ld_store_files=None,
+                 annotation_files=None,
+                 annotation_format='magenpy',
+                 backend='xarray',
+                 temp_dir='temp',
+                 output_dir='output',
+                 verbose=True,
+                 threads=1):
+        """
+        Initialize the `GWADataLoader` object with the data sources required for
+        downstream statistical genetics analyses.
+
+        :param bed_files: The path to the BED file(s). You may use a wildcard here to read files for multiple
+        chromosomes.
+        :param phenotype_file: The path to the phenotype file.
+        (Default: tab-separated file with `FID IID phenotype` columns).
+        :param covariates_file: The path to the covariates file.
+        (Default: tab-separated file starting with the `FID IID ...` columns and followed by the covariate columns).
+        :param keep_samples: A vector or list of sample IDs to keep when filtering the genotype matrix.
+        :param keep_file: A path to a plink-style keep file to select a subset of individuals.
+        :param extract_snps: A vector or list of SNP IDs to keep when filtering the genotype matrix.
+        :param extract_file: A path to a plink-style extract file to select a subset of SNPs.
+        :param min_maf: The minimum minor allele frequency cutoff.
+        :param min_mac: The minimum minor allele count cutoff.
+        :param drop_duplicated: If True, drop SNPs with duplicated rsID.
+        :param phenotype_likelihood: The likelihood of the phenotype (e.g. `gaussian`, `binomial`).
+        :param sumstats_files: The path to the summary statistics file(s). The path may be a wildcard.
+        :param sumstats_format: The format for the summary statistics. Currently supports the following
+        formats: `plink1.9`, `plink2`, `magenpy`, `fastGWA`, `COJO`, `SAIGE`, or `GWASCatalog` for the standard
+        summary statistics format (also known as `ssf` or `gwas-ssf`).
+        :param ld_store_files: The path to the LD matrices. This may be a wildcard to accommodate reading data
+        for multiple chromosomes.
+        :param annotation_files: The path to the annotation file(s). The path may contain a wildcard.
+        :param annotation_format: The format for the summary statistics. Currently, supports the following
+        formats: `magenpy`, `ldsc`.
+        :param backend: The backend software used for computations with the genotype matrix. Currently, supports
+        `xarray` and `plink`.
+        :param temp_dir: The temporary directory where to store intermediate files.
+        :param output_dir: The output directory where to store the results of the computation.
+        :param verbose: Verbosity of the information printed to standard output.
+        :param threads: The number of threads to use for computations.
+        """
+
+        # ------- Sanity checks -------
+
+        assert backend in ('xarray', 'plink')
+        assert phenotype_likelihood in ('gaussian', 'binomial')
+
+        # ------- General options -------
+
+        self.backend = backend
+
+        self.temp_dir = temp_dir
+        self.output_dir = output_dir
+        self.cleanup_dir_list = []  # Directories to clean up after execution.
+
+        makedir([temp_dir, output_dir])
+
+        self.verbose = verbose
+        self.threads = threads
+
+        # ------- General parameters -------
+
+        self.phenotype_likelihood: str = phenotype_likelihood
+
+        self.genotype: Union[Dict[int, GenotypeMatrix], None] = None
+        self.sample_table: Union[SampleTable, None] = None
+        self.ld: Union[Dict[int, LDMatrix], None] = None
+        self.sumstats_table: Union[Dict[int, SumstatsTable], None] = None
+        self.annotation: Union[Dict[int, AnnotationMatrix], None] = None
+
+        # ------- Read data files -------
+
+        self.read_genotypes(bed_files,
+                            min_maf=min_maf,
+                            min_mac=min_mac,
+                            drop_duplicated=drop_duplicated)
+        self.read_phenotype(phenotype_file)
+        self.read_covariates(covariates_file)
+        self.read_ld(ld_store_files)
+        self.read_annotations(annotation_files,
+                              annot_format=annotation_format)
+        self.read_summary_statistics(sumstats_files,
+                                     sumstats_format,
+                                     drop_duplicated=drop_duplicated)
+
+        # ------- Filter samples or SNPs -------
+
+        if extract_snps is not None or extract_file is not None:
+            self.filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+
+        if keep_samples is not None or keep_file is not None:
+            self.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+        # ------- Harmonize data sources -------
+
+        self.harmonize_data()
+
+    @property
+    def samples(self):
+        """
+        :return: The list of samples retained in the sample table.
+        """
+        if self.sample_table is not None:
+            return self.sample_table.iid
+
+    @property
+    def sample_size(self):
+        """
+
+        !!! seealso "See Also"
+            * [n][magenpy.GWADataLoader.GWADataLoader.n]
+
+        :return: The number of samples in the genotype matrix.
+
+        """
+        if self.sample_table is not None:
+            return self.sample_table.n
+        elif self.sumstats_table is not None:
+            return np.max([np.max(ss.n_per_snp) for ss in self.sumstats_table.values()])
+        else:
+            raise ValueError("Information about the sample size is not available!")
+
+    @property
+    def n(self):
+        """
+        !!! seealso "See Also"
+            * [sample_size][magenpy.GWADataLoader.GWADataLoader.sample_size]
+
+        :return: The number of samples in the genotype matrix.
+        """
+
+        return self.sample_size
+
+    @property
+    def snps(self):
+        """
+        :return: The list of SNP rsIDs retained in each chromosome.
+        :rtype: dict
+        """
+        if self.genotype is not None:
+            return {c: g.snps for c, g in self.genotype.items()}
+        elif self.sumstats_table is not None:
+            return {c: s.snps for c, s in self.sumstats_table.items()}
+        elif self.ld is not None:
+            return {c: l.snps for c, l in self.ld.items()}
+        elif self.annotation is not None:
+            return {c: a.snps for c, a in self.annotation.items()}
+        else:
+            raise ValueError("GWADataLoader instance is not properly initialized!")
+
+    @property
+    def m(self):
+        """
+        !!! seealso "See Also"
+            * [n_snps][magenpy.GWADataLoader.GWADataLoader.n_snps]
+
+        :return: The number of variants in the harmonized data sources.
+        """
+        return sum(self.shapes.values())
+
+    @property
+    def n_snps(self):
+        """
+        !!! seealso "See Also"
+            * [m][magenpy.GWADataLoader.GWADataLoader.m]
+
+        :return: The number of variants in the harmonized data sources.
+        """
+        return self.m
+
+    @property
+    def shapes(self):
+        """
+        :return: A dictionary where the key is the chromosome number and the value is
+        the number of variants on that chromosome.
+        """
+        if self.genotype is not None:
+            return {c: g.shape[1] for c, g in self.genotype.items()}
+        elif self.sumstats_table is not None:
+            return {c: s.shape[0] for c, s in self.sumstats_table.items()}
+        elif self.ld is not None:
+            return {c: l.n_snps for c, l in self.ld.items()}
+        elif self.annotation is not None:
+            return {c: a.shape[0] for c, a in self.annotation.items()}
+        else:
+            raise ValueError("GWADataLoader instance is not properly initialized!")
+
+    @property
+    def chromosomes(self):
+        """
+        :return: The list of chromosomes that were loaded to `GWADataLoader`.
+        """
+        return sorted(list(self.shapes.keys()))
+
+    @property
+    def n_annotations(self):
+        """
+        :return: The number of annotations included in the annotation matrices.
+        """
+        if self.annotation is not None:
+            return self.annotation[self.chromosomes[0]].n_annotations
+
+    def filter_snps(self, extract_snps=None, extract_file=None, chromosome=None):
+        """
+        Filter the SNP set from all the GWADataLoader objects.
+        :param extract_snps: A list or array of SNP rsIDs to keep.
+        :param extract_file: A path to a plink-style file with SNP rsIDs to keep.
+        :param chromosome: Chromosome number. If specified, applies the filter to that chromosome only.
+        """
+
+        if extract_snps is None and extract_file is None:
+            return
+
+        if chromosome is not None:
+            chroms = [chromosome]
+        else:
+            chroms = self.chromosomes
+
+        if extract_snps is None:
+            from .parsers.misc_parsers import read_snp_filter_file
+            extract_snps = read_snp_filter_file(extract_file)
+
+        for c in chroms:
+
+            # Filter the genotype matrix:
+            if self.genotype is not None and c in self.genotype:
+                self.genotype[c].filter_snps(extract_snps=extract_snps)
+
+                # If no SNPs remain in the genotype matrix for that chromosome, then remove it:
+                if self.genotype[c].shape[1] < 1:
+                    del self.genotype[c]
+
+            # Filter the summary statistics table:
+            if self.sumstats_table is not None and c in self.sumstats_table:
+                self.sumstats_table[c].filter_snps(extract_snps=extract_snps)
+
+                # If no SNPs remain in the summary statistics table for that chromosome, then remove it:
+                if self.sumstats_table[c].shape[0] < 1:
+                    del self.sumstats_table[c]
+
+            if self.ld is not None and c in self.ld:
+                self.ld[c].filter_snps(extract_snps=extract_snps)
+
+                # If no SNPs remain in the summary statistics table for that chromosome, then remove it:
+                if self.ld[c].n_snps < 1:
+                    del self.ld[c]
+
+            # Filter the annotation matrix:
+            if self.annotation is not None and c in self.annotation:
+                self.annotation[c].filter_snps(extract_snps=extract_snps)
+
+                if self.annotation[c].shape[0] < 1:
+                    del self.annotation[c]
+
+    def filter_samples(self, keep_samples=None, keep_file=None):
+        """
+        Filter samples from the samples table. User must specify
+        either a list of samples to keep or the path to a file
+        with the list of samples to keep.
+
+        :param keep_samples: A list or array of sample IDs to keep.
+        :param keep_file: The path to a file with the list of samples to keep.
+        """
+
+        self.sample_table.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+        self.sync_sample_tables()
+
+    def read_annotations(self, annot_path,
+                         annot_format='magenpy',
+                         parser=None,
+                         **parse_kwargs):
+        """
+        Read the annotation matrix from file. Annotations are a set of features associated
+        with each SNP and are generally represented in table format.
+        Consult the documentation for `AnnotationMatrix` for more details.
+
+        :param annot_path: The path to the annotation file(s). The path may contain a wildcard.
+        :param annot_format: The format for the summary statistics. Currently, supports the following
+         formats: `magenpy`, `ldsc`.
+        :param parser: If the annotation file does not follow any of the formats above, you can create
+        your own parser by inheriting from the base `AnnotationMatrixParser` class and passing it here as an argument.
+        :param parse_kwargs: keyword arguments for the parser. These are mainly parameters that will be passed to
+        `pandas.read_csv` function, such as the delimiter, header information, etc.
+        """
+
+        if annot_path is None:
+            return
+
+        # Find all the relevant files in the path passed by the user:
+        if not iterable(annot_path):
+            annot_files = get_filenames(annot_path, extension='.annot')
+        else:
+            annot_files = annot_path
+
+        if len(annot_files) < 1:
+            warnings.warn(f"No annotation files were found at: {annot_path}")
+            return
+
+        if self.verbose and len(annot_files) < 2:
+            print("> Reading annotation file...")
+
+        self.annotation = {}
+
+        for annot_file in tqdm(annot_files,
+                               total=len(annot_files),
+                               desc="Reading annotation files",
+                               disable=not self.verbose or len(annot_files) < 2):
+            annot_mat = AnnotationMatrix.from_file(annot_file,
+                                                   annot_format=annot_format,
+                                                   annot_parser=parser,
+                                                   **parse_kwargs)
+            self.annotation[annot_mat.chromosome] = annot_mat
+
+    def read_genotypes(self,
+                       bed_paths,
+                       keep_samples=None,
+                       keep_file=None,
+                       extract_snps=None,
+                       extract_file=None,
+                       min_maf=None,
+                       min_mac=1,
+                       drop_duplicated=True):
+        """
+        Read the genotype matrix and/or associated metadata from plink's BED file format.
+        Consult the documentation for `GenotypeMatrix` for more details.
+
+        :param bed_paths: The path to the BED file(s). You may use a wildcard here to read files for multiple
+        chromosomes.
+        :param keep_samples: A vector or list of sample IDs to keep when filtering the genotype matrix.
+        :param keep_file: A path to a plink-style file containing sample IDs to keep.
+        :param extract_snps: A vector or list of SNP IDs to keep when filtering the genotype matrix.
+        :param extract_file: A path to a plink-style file containing SNP IDs to keep.
+        :param min_maf: The minimum minor allele frequency cutoff.
+        :param min_mac: The minimum minor allele count cutoff.
+        :param drop_duplicated: If True, drop SNPs with duplicated rsID.
+        """
+
+        if bed_paths is None:
+            return
+
+        # Find all the relevant files in the path passed by the user:
+        if not iterable(bed_paths):
+            bed_files = get_filenames(bed_paths, extension='.bed')
+        else:
+            bed_files = bed_paths
+
+        if len(bed_files) < 1:
+            warnings.warn(f"No BED files were found at: {bed_paths}")
+            return
+
+        # Depending on the backend, select the `GenotypeMatrix` class:
+        if self.backend == 'xarray':
+            gmat_class = xarrayGenotypeMatrix
+        else:
+            gmat_class = plinkBEDGenotypeMatrix
+
+        if self.verbose and len(bed_files) < 2:
+            print("> Reading BED file...")
+
+        self.genotype = {}
+
+        for bfile in tqdm(bed_files,
+                          total=len(bed_files),
+                          desc="Reading BED files",
+                          disable=not self.verbose or len(bed_files) < 2):
+            # Read BED file and update the genotypes dictionary:
+            self.genotype.update(gmat_class.from_file(bfile,
+                                                      temp_dir=self.temp_dir,
+                                                      threads=self.threads).split_by_chromosome())
+
+        # After reading the genotype matrices, apply some standard filters:
+        for i, (c, g) in enumerate(self.genotype.items()):
+
+            # Filter the genotype matrix to keep certain subsample:
+            if keep_samples or keep_file:
+                g.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+            # Filter the genotype matrix to keep certain SNPs
+            if extract_snps or extract_file:
+                g.filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+
+            # Drop duplicated SNP IDs
+            if drop_duplicated:
+                g.drop_duplicated_snps()
+
+            # Filter SNPs by minor allele frequency and/or count:
+            g.filter_by_allele_frequency(min_maf=min_maf, min_mac=min_mac)
+
+            if i == 0:
+                self.sample_table = g.sample_table
+
+    def read_phenotype(self, phenotype_file, drop_na=True, **read_csv_kwargs):
+        """
+        Read the phenotype file and integrate it with the sample tables and genotype matrices.
+
+        :param phenotype_file: The path to the phenotype file
+        (Default: tab-separated file with `FID IID phenotype` columns). If different, supply
+        details as additional arguments to this function.
+        :param drop_na: Drop samples with missing phenotype information.
+        :param read_csv_kwargs: keyword arguments for the `read_csv` function of `pandas`.
+        """
+
+        if phenotype_file is None:
+            return
+
+        if self.verbose:
+            print("> Reading phenotype file...")
+
+        assert self.sample_table is not None
+
+        self.sample_table.read_phenotype_file(phenotype_file, drop_na=drop_na, **read_csv_kwargs)
+        self.sync_sample_tables()
+
+    def set_phenotype(self, new_phenotype, phenotype_likelihood=None):
+        """
+        A convenience method to update the phenotype column for the samples.
+        :param new_phenotype: A vector or list of phenotype values.
+        :param phenotype_likelihood: The phenotype likelihood (e.g. `binomial`, `gaussian`). Optional.
+        """
+
+        self.sample_table.set_phenotype(new_phenotype,
+                                        phenotype_likelihood=phenotype_likelihood or self.phenotype_likelihood)
+        self.sync_sample_tables()
+
+    def read_covariates(self, covariates_file, **read_csv_kwargs):
+        """
+        Read the covariates file and integrate it with the sample tables and genotype matrices.
+
+        :param covariates_file: The path to the covariates file
+        (Default: tab-separated file starting with the `FID IID ...` columns and followed by the covariate columns).
+        :param read_csv_kwargs: keyword arguments for the `read_csv` function of `pandas`.
+        """
+
+        if covariates_file is None:
+            return
+
+        if self.verbose:
+            print("> Reading covariates file...")
+
+        assert self.sample_table is not None
+
+        self.sample_table.read_covariates_file(covariates_file, **read_csv_kwargs)
+        self.sync_sample_tables()
+
+    def read_summary_statistics(self,
+                                sumstats_path,
+                                sumstats_format='magenpy',
+                                parser=None,
+                                drop_duplicated=True,
+                                **parse_kwargs):
+        """
+        Read GWAS summary statistics file(s) and parse them to `SumstatsTable` objects.
+
+        :param sumstats_path: The path to the summary statistics file(s). The path may be a wildcard.
+        :param sumstats_format: The format for the summary statistics. Currently supports the following
+         formats: `plink1.9`, `plink2`, `magenpy`, `fastGWA`, `COJO`, `SAIGE`, or `GWASCatalog` for the standard
+         summary statistics format (also known as `ssf` or `gwas-ssf`).
+        :param parser: If the summary statistics file does not follow any of the formats above, you can create
+        your own parser by inheriting from the base `SumstatsParser` class and passing it here as an argument.
+        :param drop_duplicated: Drop SNPs with duplicated rsIDs.
+        :param parse_kwargs: keyword arguments for the parser. These are mainly parameters that will be passed to
+        `pandas.read_csv` function, such as the delimiter, header information, etc.
+        """
+
+        if sumstats_path is None:
+            return
+
+        if not iterable(sumstats_path):
+            sumstats_files = get_filenames(sumstats_path)
+
+            from .utils.system_utils import valid_url
+            if len(sumstats_files) < 1 and valid_url(sumstats_path):
+                sumstats_files = [sumstats_path]
+        else:
+            sumstats_files = sumstats_path
+
+        if len(sumstats_files) < 1:
+            warnings.warn(f"No summary statistics files were found at: {sumstats_path}")
+            return
+
+        if self.verbose and len(sumstats_files) < 2:
+            print("> Reading summary statistics file...")
+
+        self.sumstats_table = {}
+
+        for f in tqdm(sumstats_files,
+                      total=len(sumstats_files),
+                      desc="Reading summary statistics files",
+                      disable=not self.verbose or len(sumstats_files) < 2):
+
+            ss_tab = SumstatsTable.from_file(f, sumstats_format=sumstats_format, parser=parser, **parse_kwargs)
+
+            if drop_duplicated:
+                ss_tab.drop_duplicates()
+
+            if 'CHR' in ss_tab.table.columns:
+                self.sumstats_table.update(ss_tab.split_by_chromosome())
+            else:
+                if self.genotype is not None:
+                    ref_table = {c: g.snps for c, g in self.genotype.items()}
+                elif self.ld is not None:
+                    ref_table = {c: ld.snps for c, ld in self.ld.items()}
+                else:
+                    raise ValueError("Cannot index summary statistics tables without chromosome information!")
+
+                self.sumstats_table.update(ss_tab.split_by_chromosome(snps_per_chrom=ref_table))
+
+    def read_ld(self, ld_store_paths):
+        """
+        Read the LD matrix files stored on-disk in Zarr array format.
+        :param ld_store_paths: The path to the LD matrices. This may be a wildcard to accommodate reading data
+        for multiple chromosomes.
+        """
+
+        if ld_store_paths is None:
+            return
+
+        if not iterable(ld_store_paths):
+            ld_store_files = get_filenames(ld_store_paths, extension='.zgroup')
+        else:
+            ld_store_files = ld_store_paths
+
+        if len(ld_store_files) < 1:
+            warnings.warn(f"No LD matrix files were found at: {ld_store_paths}")
+            return
+
+        if self.verbose and len(ld_store_files) < 2:
+            print("> Reading LD matrix...")
+
+        self.ld = {}
+
+        for f in tqdm(ld_store_files,
+                      total=len(ld_store_files),
+                      desc="Reading LD matrices",
+                      disable=not self.verbose or len(ld_store_files) < 2):
+            z = LDMatrix.from_path(f)
+            self.ld[z.chromosome] = z
+
+    def load_ld(self):
+        """
+        A utility method to load the LD matrices to memory from on-disk storage.
+        """
+        if self.ld is not None:
+            for ld in self.ld.values():
+                ld.load()
+
+    def release_ld(self):
+        """
+        A utility function to release the LD matrices from memory.
+        """
+        if self.ld is not None:
+            for ld in self.ld.values():
+                ld.release()
+
+    def compute_ld(self,
+                   estimator,
+                   output_dir,
+                   dtype='int16',
+                   compressor_name='lz4',
+                   compression_level=5,
+                   **ld_kwargs):
+        """
+        Compute the Linkage-Disequilibrium (LD) matrix or SNP-by-SNP Pearson
+        correlation matrix between genetic variants. This function only considers correlations
+        between SNPs on the same chromosome. This is a utility function that calls the
+        `.compute_ld()` method of the `GenotypeMatrix` objects associated with
+        GWADataLoader.
+
+        :param estimator: The estimator for the LD matrix. We currently support
+        4 different estimators: `sample`, `windowed`, `shrinkage`, and `block`.
+        :param output_dir: The output directory where the Zarr array containing the
+        entries of the LD matrix will be stored.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compression algorithm to use for the LD matrix.
+        :param compression_level: The compression level to use for the entries of the LD matrix (1-9).
+        :param ld_kwargs: keyword arguments for the various LD estimators. Consult
+        the implementations of `WindowedLD`, `ShrinkageLD`, and `BlockLD` for details.
+        """
+
+        if self.verbose and len(self.genotype) < 2:
+            print("> Computing LD matrix...")
+
+        self.ld = {
+            c: g.compute_ld(estimator,
+                            output_dir,
+                            dtype=dtype,
+                            compressor_name=compressor_name,
+                            compression_level=compression_level,
+                            **ld_kwargs)
+            for c, g in tqdm(sorted(self.genotype.items(), key=lambda x: x[0]),
+                             total=len(self.genotype),
+                             desc='Computing LD matrices',
+                             disable=not self.verbose or len(self.genotype) < 2)
+        }
+
+    def get_ld_matrices(self):
+        """
+        :return: The LD matrices computed for each chromosome.
+        """
+        return self.ld
+
+    def harmonize_data(self):
+        """
+        This method ensures that the data sources (reference genotype,
+        LD matrices, summary statistics, annotations) are all aligned in terms of the
+        set of variants that they operate on as well as the designation of the effect allele for
+        each variant.
+
+        !!! note
+            This method is called automatically during the initialization of the `GWADataLoader` object.
+            However, if you read or manipulate the data sources after initialization,
+            you may need to call this method again to ensure that the data sources remain aligned.
+
+        """
+
+        data_sources = (self.genotype, self.sumstats_table, self.ld, self.annotation)
+        initialized_data_sources = [ds for ds in data_sources if ds is not None]
+
+        # If less than two data sources are present, skip harmonization...
+        if len(initialized_data_sources) < 2:
+            return
+
+        # Get the chromosomes information from all the data sources:
+        chromosomes = list(set.union(*[set(ds.keys()) for ds in initialized_data_sources]))
+
+        if self.verbose and len(chromosomes) < 2:
+            print("> Harmonizing data...")
+
+        for c in tqdm(chromosomes,
+                      total=len(chromosomes),
+                      desc='Harmonizing data',
+                      disable=not self.verbose or len(chromosomes) < 2):
+
+            # Which initialized data sources have information for chromosome `c`
+            miss_chroms = [c not in ds for ds in initialized_data_sources]
+
+            if sum(miss_chroms) > 0:
+                # If the chromosome data only exists for some data sources but not others, remove the chromosome
+                # from all data source.
+                # Is this the best way to handle the missingness? Should we just post a warning?
+                for ds in initialized_data_sources:
+                    if c in ds:
+                        del ds[c]
+
+            else:
+
+                # Find the set of SNPs that are shared across all data sources:
+                common_snps = np.array(list(set.intersection(*[set(ds[c].snps)
+                                                               for ds in initialized_data_sources])))
+
+                # If necessary, filter the data sources to only have the common SNPs:
+                for ds in initialized_data_sources:
+                    if ds[c].n_snps != len(common_snps):
+                        ds[c].filter_snps(extract_snps=common_snps)
+
+                # Harmonize the summary statistics data with either genotype or LD reference.
+                # This procedure checks for flips in the effect allele between data sources.
+                if self.sumstats_table is not None:
+                    if self.genotype is not None:
+                        self.sumstats_table[c].match(self.genotype[c].get_snp_table(col_subset=['SNP', 'A1', 'A2']))
+                    elif self.ld is not None:
+                        self.sumstats_table[c].match(self.ld[c].to_snp_table(col_subset=['SNP', 'A1', 'A2']))
+
+                    # If during the allele matching process we discover incompatibilities,
+                    # we filter those SNPs:
+                    for ds in initialized_data_sources:
+                        if ds[c].n_snps != self.sumstats_table[c].n_snps:
+                            ds[c].filter_snps(extract_snps=self.sumstats_table[c].snps)
+
+    def perform_gwas(self, **gwa_kwargs):
+        """
+        Perform genome-wide association testing of all variants against the phenotype.
+        This is a utility function that calls the `.perform_gwas()` method of the
+        `GenotypeMatrix` objects associated with GWADataLoader.
+
+        :param gwa_kwargs: Keyword arguments to pass to the GWA functions. Consult stats.gwa.utils
+        for relevant keyword arguments for each backend.
+        """
+
+        if self.verbose and len(self.genotype) < 2:
+            print("> Performing GWAS...")
+
+        self.sumstats_table = {
+            c: g.perform_gwas(**gwa_kwargs)
+            for c, g in tqdm(sorted(self.genotype.items(), key=lambda x: x[0]),
+                             total=len(self.genotype),
+                             desc='Performing GWAS',
+                             disable=not self.verbose or len(self.genotype) < 2)
+        }
+
+    def score(self, beta=None, standardize_genotype=False):
+        """
+        Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, `beta`.
+
+        :param beta: A dictionary where the keys are the chromosome numbers and the
+        values are a vector of effect sizes for each variant on that chromosome. If the
+        betas are not provided, we use the marginal betas by default (if those are available).
+        :param standardize_genotype: If True, standardize the genotype matrix before scoring.
+        """
+
+        if beta is None:
+            try:
+                beta = {c: s.marginal_beta or s.get_snp_pseudo_corr() for c, s in self.sumstats_table.items()}
+            except Exception:
+                raise ValueError("To perform linear scoring, you must provide effect size estimates (BETA)!")
+
+        # Here, we have a very ugly way of accounting for
+        # the fact that the chromosomes may be coded differently between the genotype
+        # and the beta dictionary. Maybe we can find a better solution in the future.
+        common_chr_g, common_chr_b = match_chromosomes(self.genotype.keys(), beta.keys(), return_both=True)
+
+        if len(common_chr_g) < 1:
+            raise ValueError("No common chromosomes found between the genotype and the effect size estimates!")
+
+        if self.verbose and len(common_chr_g) < 2:
+            print("> Generating polygenic scores...")
+
+        pgs = None
+
+        for c_g, c_b in tqdm(zip(common_chr_g, common_chr_b),
+                             total=len(common_chr_g),
+                             desc='Generating polygenic scores',
+                             disable=not self.verbose or len(common_chr_g) < 2):
+
+            if pgs is None:
+                pgs = self.genotype[c_g].score(beta[c_b], standardize_genotype=standardize_genotype)
+            else:
+                pgs += self.genotype[c_g].score(beta[c_b], standardize_genotype=standardize_genotype)
+
+        # If we only have a single set of betas, flatten the PGS vector:
+        if len(pgs.shape) > 1 and pgs.shape[1] == 1:
+            pgs = pgs.flatten()
+
+        return pgs
+
+    def predict(self, beta=None):
+        """
+        Predict the phenotype for the genotyped samples using the provided effect size
+        estimates `beta`. For quantitative traits, this is equivalent to performing
+        linear scoring. For binary phenotypes, we transform the output using probit link function.
+
+        :param beta: A dictionary where the keys are the chromosome numbers and the
+        values are a vector of effect sizes for each variant on that chromosome. If the
+        betas are not provided, we use the marginal betas by default (if those are available).
+        """
+
+        # Perform linear scoring:
+        pgs = self.score(beta)
+
+        if self.phenotype_likelihood == 'binomial':
+            # Apply probit link function:
+            from scipy.stats import norm
+            pgs = norm.cdf(pgs)
+
+        return pgs
+
+    def to_individual_table(self):
+        """
+        :return: A plink-style dataframe of individual IDs, in the form of
+        Family ID (FID) and Individual ID (IID).
+        """
+
+        return self.sample_table.get_individual_table()
+
+    def to_phenotype_table(self):
+        """
+        :return: A plink-style dataframe with each individual's Family ID (FID),
+        Individual ID (IID), and phenotype value.
+        """
+
+        return self.sample_table.get_phenotype_table()
+
+    def to_snp_table(self, col_subset=None, per_chromosome=False):
+        """
+        Get a dataframe of SNP data for all variants
+        across different chromosomes.
+
+        :param col_subset: The subset of columns to obtain.
+        :param per_chromosome: If True, returns a dictionary where the key
+        is the chromosome number and the value is the SNP table per
+        chromosome.
+
+        :return: A dataframe (or dictionary of dataframes) of SNP data.
+        """
+
+        snp_tables = {}
+
+        for c in self.chromosomes:
+            if self.sumstats_table is not None:
+                snp_tables[c] = self.sumstats_table[c].to_table(col_subset=col_subset)
+            elif self.genotype is not None:
+                snp_tables[c] = self.genotype[c].get_snp_table(col_subset=col_subset)
+            elif self.ld is not None:
+                snp_tables[c] = self.ld[c].to_snp_table(col_subset=col_subset)
+            else:
+                raise ValueError("GWADataLoader instance is not properly initialized!")
+
+        if per_chromosome:
+            return snp_tables
+        else:
+            return pd.concat(list(snp_tables.values()))
+
+    def to_summary_statistics_table(self, col_subset=None, per_chromosome=False):
+        """
+        Get a dataframe of the GWAS summary statistics for all variants
+        across different chromosomes.
+
+        :param col_subset: The subset of columns (or summary statistics) to obtain.
+        :param per_chromosome: If True, returns a dictionary where the key
+        is the chromosome number and the value is the summary statistics table per
+        chromosome.
+
+        :return: A dataframe (or dictionary of dataframes) of summary statistics.
+        """
+
+        assert self.sumstats_table is not None
+
+        snp_tables = {}
+
+        for c in self.chromosomes:
+            snp_tables[c] = self.sumstats_table[c].to_table(col_subset=col_subset)
+
+        if per_chromosome:
+            return snp_tables
+        else:
+            return pd.concat(list(snp_tables.values()))
+
+    def sync_sample_tables(self):
+        """
+        A utility method to sync the sample tables of the
+        `GenotypeMatrix` objects with the sample table under
+        the `GWADataLoader` object. This is especially important
+        when setting new phenotypes (from the simulators) or reading
+        covariates files, etc.
+        """
+
+        for c, g in self.genotype.items():
+            g.set_sample_table(self.sample_table)
+
+    def split_by_chromosome(self):
+        """
+        A utility method to split a GWADataLoader object by chromosome ID, such that
+        we would have one `GWADataLoader` object per chromosome. The method returns a dictionary
+        where the key is the chromosome number and the value is the `GWADataLoader` object corresponding
+        to that chromosome only.
+        """
+
+        if len(self.chromosomes) == 1:
+            return {self.chromosomes[0]: self}
+
+        else:
+            split_dict = {}
+
+            for c in self.chromosomes:
+                split_dict[c] = copy.copy(self)
+
+                if self.genotype is not None and c in self.genotype:
+                    split_dict[c].genotype = {c: self.genotype[c]}
+                if self.sumstats_table is not None and c in self.sumstats_table:
+                    split_dict[c].sumstats_table = {c: self.sumstats_table[c]}
+                if self.ld is not None and c in self.ld:
+                    split_dict[c].ld = {c: self.ld[c]}
+                if self.annotation is not None and c in self.annotation:
+                    split_dict[c].annotation = {c: self.annotation[c]}
+
+            return split_dict
+
+    def split_by_samples(self, proportions=None, groups=None, keep_original=True):
+        """
+        Split the `GWADataLoader` object by samples, if genotype or sample data
+        is available. The user must provide a list or proportion of samples in each split,
+        and the method will return a list of `GWADataLoader` objects with only the samples
+        designated for each split. This may be a useful utility for training/testing split or some
+        other downstream tasks.
+
+        :param proportions: A list with the proportion of samples in each split. Must add to 1.
+        :param groups: A list of lists containing the sample IDs in each split.
+        :param keep_original: If True, keep the original `GWADataLoader` object and do not
+        transform it in the splitting process.
+        """
+
+        if self.sample_table is None:
+            raise ValueError("The sample table is not set!")
+
+        if groups is None:
+            if proportions is None:
+                raise ValueError("To split a `GWADataloader` object by samples, the user must provide either the list "
+                                 "or proportion of individuals in each split.")
+            else:
+
+                # Assign each sample to a different split randomly by drawing from a multinomial:
+                random_split = np.random.multinomial(1, proportions, size=self.sample_size).astype(bool)
+                # Extract the individuals in each group from the multinomial sample:
+                groups = [self.samples[random_split[:, i]] for i in range(random_split.shape[1])]
+
+        gdls = []
+        for i, g in enumerate(groups):
+
+            if len(g) < 1:
+                raise ValueError(f"Group {i} is empty! Please ensure that all splits have at least one sample.")
+
+            if (i + 1) == len(groups) and not keep_original:
+                new_gdl = self
+            else:
+                new_gdl = copy.deepcopy(self)
+
+            new_gdl.filter_samples(keep_samples=g)
+
+            gdls.append(new_gdl)
+
+        return gdls
+
+    def align_with(self, other_gdls, axis='SNP', how='inner'):
+        """
+        Align the `GWADataLoader` object with other GDL objects to have the same
+        set of SNPs or samples. This utility method is meant to enable the user to
+        align multiple data sources for downstream analyses.
+
+        :param other_gdls: A `GWADataLoader` or list of `GWADataLoader` objects.
+        :param axis: The axis on which to perform the alignment (can be `sample` for aligning individuals or
+        `SNP` for aligning variants across the datasets).
+        :param how: The type of join to perform across the datasets. For now, we support an inner join sort
+        of operation.
+
+        !!! warning
+            Experimental for now, would like to add more features here in the near future.
+
+        """
+
+        if isinstance(other_gdls, GWADataLoader):
+            other_gdls = [other_gdls]
+
+        assert all([isinstance(gdl, GWADataLoader) for gdl in other_gdls])
+
+        if axis == 'SNP':
+            # Ensure that all the GDLs have the same set of SNPs.
+            # This may be useful if the goal is to select a common set of variants
+            # that are shared across different datasets.
+            for c in self.chromosomes:
+                common_snps = set(self.snps[c])
+                for gdl in other_gdls:
+                    common_snps = common_snps.intersection(set(gdl.snps[c]))
+
+                common_snps = np.array(list(common_snps))
+
+                for gdl in other_gdls:
+                    gdl.filter_snps(extract_snps=common_snps, chromosome=c)
+
+                self.filter_snps(extract_snps=common_snps, chromosome=c)
+
+        elif axis == 'sample':
+            # Ensure that all the GDLs have the same set of samples.
+            # This may be useful when different GDLs have different covariates, phenotypes,
+            # or other information pertaining to the individuals.
+
+            common_samples = set(self.samples)
+
+            for gdl in other_gdls:
+                common_samples = common_samples.intersection(set(gdl.samples))
+
+            common_samples = np.array(list(common_samples))
+
+            for gdl in other_gdls:
+                gdl.filter_samples(keep_samples=common_samples)
+
+            self.filter_samples(keep_samples=common_samples)
+
+        else:
+            raise KeyError("Alignment axis can only be either 'SNP' or 'sample'!")
+
+    def cleanup(self):
+        """
+        Clean up all temporary files and directories
+        """
+        if self.verbose:
+            print("> Cleaning up workspace.")
+
+        for tmpdir in self.cleanup_dir_list:
+            try:
+                tmpdir.cleanup()
+            except FileNotFoundError:
+                continue
+
+        # Clean up the temporary files associated with the genotype matrices:
+        if self.genotype is not None:
+            for g in self.genotype.values():
+                g.cleanup()
+
+        # Release the LD data from memory:
+        self.release_ld()
+
+
+ + + +
+ + + + + + + +
+ + + +

+ chromosomes + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The list of chromosomes that were loaded to GWADataLoader.

+
+
+
+ +
+ +
+ + + +

+ m + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the harmonized data sources.

+
+
+
+ +
+ +
+ + + +

+ n + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of samples in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ n_annotations + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of annotations included in the annotation matrices.

+
+
+
+ +
+ +
+ + + +

+ n_snps + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • m
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the harmonized data sources.

+
+
+
+ +
+ +
+ + + +

+ sample_size + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • n
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of samples in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ samples + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The list of samples retained in the sample table.

+
+
+
+ +
+ +
+ + + +

+ shapes + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary where the key is the chromosome number and the value is the number of variants on that chromosome.

+
+
+
+ +
+ +
+ + + +

+ snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ dict + +
+

The list of SNP rsIDs retained in each chromosome.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(bed_files=None, phenotype_file=None, covariates_file=None, keep_samples=None, keep_file=None, extract_snps=None, extract_file=None, min_maf=None, min_mac=None, drop_duplicated=True, phenotype_likelihood='gaussian', sumstats_files=None, sumstats_format='magenpy', ld_store_files=None, annotation_files=None, annotation_format='magenpy', backend='xarray', temp_dir='temp', output_dir='output', verbose=True, threads=1) + +

+ + +
+ +

Initialize the GWADataLoader object with the data sources required for +downstream statistical genetics analyses.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_files + +
+

The path to the BED file(s). You may use a wildcard here to read files for multiple chromosomes.

+
+
+ None +
phenotype_file + +
+

The path to the phenotype file. (Default: tab-separated file with FID IID phenotype columns).

+
+
+ None +
covariates_file + +
+

The path to the covariates file. (Default: tab-separated file starting with the FID IID ... columns and followed by the covariate columns).

+
+
+ None +
keep_samples + +
+

A vector or list of sample IDs to keep when filtering the genotype matrix.

+
+
+ None +
keep_file + +
+

A path to a plink-style keep file to select a subset of individuals.

+
+
+ None +
extract_snps + +
+

A vector or list of SNP IDs to keep when filtering the genotype matrix.

+
+
+ None +
extract_file + +
+

A path to a plink-style extract file to select a subset of SNPs.

+
+
+ None +
min_maf + +
+

The minimum minor allele frequency cutoff.

+
+
+ None +
min_mac + +
+

The minimum minor allele count cutoff.

+
+
+ None +
drop_duplicated + +
+

If True, drop SNPs with duplicated rsID.

+
+
+ True +
phenotype_likelihood + +
+

The likelihood of the phenotype (e.g. gaussian, binomial).

+
+
+ 'gaussian' +
sumstats_files + +
+

The path to the summary statistics file(s). The path may be a wildcard.

+
+
+ None +
sumstats_format + +
+

The format for the summary statistics. Currently supports the following formats: plink1.9, plink2, magenpy, fastGWA, COJO, SAIGE, or GWASCatalog for the standard summary statistics format (also known as ssf or gwas-ssf).

+
+
+ 'magenpy' +
ld_store_files + +
+

The path to the LD matrices. This may be a wildcard to accommodate reading data for multiple chromosomes.

+
+
+ None +
annotation_files + +
+

The path to the annotation file(s). The path may contain a wildcard.

+
+
+ None +
annotation_format + +
+

The format for the summary statistics. Currently, supports the following formats: magenpy, ldsc.

+
+
+ 'magenpy' +
backend + +
+

The backend software used for computations with the genotype matrix. Currently, supports xarray and plink.

+
+
+ 'xarray' +
temp_dir + +
+

The temporary directory where to store intermediate files.

+
+
+ 'temp' +
output_dir + +
+

The output directory where to store the results of the computation.

+
+
+ 'output' +
verbose + +
+

Verbosity of the information printed to standard output.

+
+
+ True +
threads + +
+

The number of threads to use for computations.

+
+
+ 1 +
+ +
+ Source code in magenpy/GWADataLoader.py +
def __init__(self,
+             bed_files=None,
+             phenotype_file=None,
+             covariates_file=None,
+             keep_samples=None,
+             keep_file=None,
+             extract_snps=None,
+             extract_file=None,
+             min_maf=None,
+             min_mac=None,
+             drop_duplicated=True,
+             phenotype_likelihood='gaussian',
+             sumstats_files=None,
+             sumstats_format='magenpy',
+             ld_store_files=None,
+             annotation_files=None,
+             annotation_format='magenpy',
+             backend='xarray',
+             temp_dir='temp',
+             output_dir='output',
+             verbose=True,
+             threads=1):
+    """
+    Initialize the `GWADataLoader` object with the data sources required for
+    downstream statistical genetics analyses.
+
+    :param bed_files: The path to the BED file(s). You may use a wildcard here to read files for multiple
+    chromosomes.
+    :param phenotype_file: The path to the phenotype file.
+    (Default: tab-separated file with `FID IID phenotype` columns).
+    :param covariates_file: The path to the covariates file.
+    (Default: tab-separated file starting with the `FID IID ...` columns and followed by the covariate columns).
+    :param keep_samples: A vector or list of sample IDs to keep when filtering the genotype matrix.
+    :param keep_file: A path to a plink-style keep file to select a subset of individuals.
+    :param extract_snps: A vector or list of SNP IDs to keep when filtering the genotype matrix.
+    :param extract_file: A path to a plink-style extract file to select a subset of SNPs.
+    :param min_maf: The minimum minor allele frequency cutoff.
+    :param min_mac: The minimum minor allele count cutoff.
+    :param drop_duplicated: If True, drop SNPs with duplicated rsID.
+    :param phenotype_likelihood: The likelihood of the phenotype (e.g. `gaussian`, `binomial`).
+    :param sumstats_files: The path to the summary statistics file(s). The path may be a wildcard.
+    :param sumstats_format: The format for the summary statistics. Currently supports the following
+    formats: `plink1.9`, `plink2`, `magenpy`, `fastGWA`, `COJO`, `SAIGE`, or `GWASCatalog` for the standard
+    summary statistics format (also known as `ssf` or `gwas-ssf`).
+    :param ld_store_files: The path to the LD matrices. This may be a wildcard to accommodate reading data
+    for multiple chromosomes.
+    :param annotation_files: The path to the annotation file(s). The path may contain a wildcard.
+    :param annotation_format: The format for the summary statistics. Currently, supports the following
+    formats: `magenpy`, `ldsc`.
+    :param backend: The backend software used for computations with the genotype matrix. Currently, supports
+    `xarray` and `plink`.
+    :param temp_dir: The temporary directory where to store intermediate files.
+    :param output_dir: The output directory where to store the results of the computation.
+    :param verbose: Verbosity of the information printed to standard output.
+    :param threads: The number of threads to use for computations.
+    """
+
+    # ------- Sanity checks -------
+
+    assert backend in ('xarray', 'plink')
+    assert phenotype_likelihood in ('gaussian', 'binomial')
+
+    # ------- General options -------
+
+    self.backend = backend
+
+    self.temp_dir = temp_dir
+    self.output_dir = output_dir
+    self.cleanup_dir_list = []  # Directories to clean up after execution.
+
+    makedir([temp_dir, output_dir])
+
+    self.verbose = verbose
+    self.threads = threads
+
+    # ------- General parameters -------
+
+    self.phenotype_likelihood: str = phenotype_likelihood
+
+    self.genotype: Union[Dict[int, GenotypeMatrix], None] = None
+    self.sample_table: Union[SampleTable, None] = None
+    self.ld: Union[Dict[int, LDMatrix], None] = None
+    self.sumstats_table: Union[Dict[int, SumstatsTable], None] = None
+    self.annotation: Union[Dict[int, AnnotationMatrix], None] = None
+
+    # ------- Read data files -------
+
+    self.read_genotypes(bed_files,
+                        min_maf=min_maf,
+                        min_mac=min_mac,
+                        drop_duplicated=drop_duplicated)
+    self.read_phenotype(phenotype_file)
+    self.read_covariates(covariates_file)
+    self.read_ld(ld_store_files)
+    self.read_annotations(annotation_files,
+                          annot_format=annotation_format)
+    self.read_summary_statistics(sumstats_files,
+                                 sumstats_format,
+                                 drop_duplicated=drop_duplicated)
+
+    # ------- Filter samples or SNPs -------
+
+    if extract_snps is not None or extract_file is not None:
+        self.filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+
+    if keep_samples is not None or keep_file is not None:
+        self.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+    # ------- Harmonize data sources -------
+
+    self.harmonize_data()
+
+
+
+ +
+ + +
+ + + +

+ align_with(other_gdls, axis='SNP', how='inner') + +

+ + +
+ +

Align the GWADataLoader object with other GDL objects to have the same +set of SNPs or samples. This utility method is meant to enable the user to +align multiple data sources for downstream analyses.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
other_gdls + +
+

A GWADataLoader or list of GWADataLoader objects.

+
+
+ required +
axis + +
+

The axis on which to perform the alignment (can be sample for aligning individuals or SNP for aligning variants across the datasets).

+
+
+ 'SNP' +
how + +
+

The type of join to perform across the datasets. For now, we support an inner join sort of operation. !!! warning Experimental for now, would like to add more features here in the near future.

+
+
+ 'inner' +
+ +
+ Source code in magenpy/GWADataLoader.py +
def align_with(self, other_gdls, axis='SNP', how='inner'):
+    """
+    Align the `GWADataLoader` object with other GDL objects to have the same
+    set of SNPs or samples. This utility method is meant to enable the user to
+    align multiple data sources for downstream analyses.
+
+    :param other_gdls: A `GWADataLoader` or list of `GWADataLoader` objects.
+    :param axis: The axis on which to perform the alignment (can be `sample` for aligning individuals or
+    `SNP` for aligning variants across the datasets).
+    :param how: The type of join to perform across the datasets. For now, we support an inner join sort
+    of operation.
+
+    !!! warning
+        Experimental for now, would like to add more features here in the near future.
+
+    """
+
+    if isinstance(other_gdls, GWADataLoader):
+        other_gdls = [other_gdls]
+
+    assert all([isinstance(gdl, GWADataLoader) for gdl in other_gdls])
+
+    if axis == 'SNP':
+        # Ensure that all the GDLs have the same set of SNPs.
+        # This may be useful if the goal is to select a common set of variants
+        # that are shared across different datasets.
+        for c in self.chromosomes:
+            common_snps = set(self.snps[c])
+            for gdl in other_gdls:
+                common_snps = common_snps.intersection(set(gdl.snps[c]))
+
+            common_snps = np.array(list(common_snps))
+
+            for gdl in other_gdls:
+                gdl.filter_snps(extract_snps=common_snps, chromosome=c)
+
+            self.filter_snps(extract_snps=common_snps, chromosome=c)
+
+    elif axis == 'sample':
+        # Ensure that all the GDLs have the same set of samples.
+        # This may be useful when different GDLs have different covariates, phenotypes,
+        # or other information pertaining to the individuals.
+
+        common_samples = set(self.samples)
+
+        for gdl in other_gdls:
+            common_samples = common_samples.intersection(set(gdl.samples))
+
+        common_samples = np.array(list(common_samples))
+
+        for gdl in other_gdls:
+            gdl.filter_samples(keep_samples=common_samples)
+
+        self.filter_samples(keep_samples=common_samples)
+
+    else:
+        raise KeyError("Alignment axis can only be either 'SNP' or 'sample'!")
+
+
+
+ +
+ + +
+ + + +

+ cleanup() + +

+ + +
+ +

Clean up all temporary files and directories

+ +
+ Source code in magenpy/GWADataLoader.py +
def cleanup(self):
+    """
+    Clean up all temporary files and directories
+    """
+    if self.verbose:
+        print("> Cleaning up workspace.")
+
+    for tmpdir in self.cleanup_dir_list:
+        try:
+            tmpdir.cleanup()
+        except FileNotFoundError:
+            continue
+
+    # Clean up the temporary files associated with the genotype matrices:
+    if self.genotype is not None:
+        for g in self.genotype.values():
+            g.cleanup()
+
+    # Release the LD data from memory:
+    self.release_ld()
+
+
+
+ +
+ + +
+ + + +

+ compute_ld(estimator, output_dir, dtype='int16', compressor_name='lz4', compression_level=5, **ld_kwargs) + +

+ + +
+ +

Compute the Linkage-Disequilibrium (LD) matrix or SNP-by-SNP Pearson +correlation matrix between genetic variants. This function only considers correlations +between SNPs on the same chromosome. This is a utility function that calls the +.compute_ld() method of the GenotypeMatrix objects associated with +GWADataLoader.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
estimator + +
+

The estimator for the LD matrix. We currently support 4 different estimators: sample, windowed, shrinkage, and block.

+
+
+ required +
output_dir + +
+

The output directory where the Zarr array containing the entries of the LD matrix will be stored.

+
+
+ required +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compression algorithm to use for the LD matrix.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the entries of the LD matrix (1-9).

+
+
+ 5 +
ld_kwargs + +
+

keyword arguments for the various LD estimators. Consult the implementations of WindowedLD, ShrinkageLD, and BlockLD for details.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def compute_ld(self,
+               estimator,
+               output_dir,
+               dtype='int16',
+               compressor_name='lz4',
+               compression_level=5,
+               **ld_kwargs):
+    """
+    Compute the Linkage-Disequilibrium (LD) matrix or SNP-by-SNP Pearson
+    correlation matrix between genetic variants. This function only considers correlations
+    between SNPs on the same chromosome. This is a utility function that calls the
+    `.compute_ld()` method of the `GenotypeMatrix` objects associated with
+    GWADataLoader.
+
+    :param estimator: The estimator for the LD matrix. We currently support
+    4 different estimators: `sample`, `windowed`, `shrinkage`, and `block`.
+    :param output_dir: The output directory where the Zarr array containing the
+    entries of the LD matrix will be stored.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compression algorithm to use for the LD matrix.
+    :param compression_level: The compression level to use for the entries of the LD matrix (1-9).
+    :param ld_kwargs: keyword arguments for the various LD estimators. Consult
+    the implementations of `WindowedLD`, `ShrinkageLD`, and `BlockLD` for details.
+    """
+
+    if self.verbose and len(self.genotype) < 2:
+        print("> Computing LD matrix...")
+
+    self.ld = {
+        c: g.compute_ld(estimator,
+                        output_dir,
+                        dtype=dtype,
+                        compressor_name=compressor_name,
+                        compression_level=compression_level,
+                        **ld_kwargs)
+        for c, g in tqdm(sorted(self.genotype.items(), key=lambda x: x[0]),
+                         total=len(self.genotype),
+                         desc='Computing LD matrices',
+                         disable=not self.verbose or len(self.genotype) < 2)
+    }
+
+
+
+ +
+ + +
+ + + +

+ filter_samples(keep_samples=None, keep_file=None) + +

+ + +
+ +

Filter samples from the samples table. User must specify +either a list of samples to keep or the path to a file +with the list of samples to keep.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
keep_samples + +
+

A list or array of sample IDs to keep.

+
+
+ None +
keep_file + +
+

The path to a file with the list of samples to keep.

+
+
+ None +
+ +
+ Source code in magenpy/GWADataLoader.py +
def filter_samples(self, keep_samples=None, keep_file=None):
+    """
+    Filter samples from the samples table. User must specify
+    either a list of samples to keep or the path to a file
+    with the list of samples to keep.
+
+    :param keep_samples: A list or array of sample IDs to keep.
+    :param keep_file: The path to a file with the list of samples to keep.
+    """
+
+    self.sample_table.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+    self.sync_sample_tables()
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None, chromosome=None) + +

+ + +
+ +

Filter the SNP set from all the GWADataLoader objects.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list or array of SNP rsIDs to keep.

+
+
+ None +
extract_file + +
+

A path to a plink-style file with SNP rsIDs to keep.

+
+
+ None +
chromosome + +
+

Chromosome number. If specified, applies the filter to that chromosome only.

+
+
+ None +
+ +
+ Source code in magenpy/GWADataLoader.py +
def filter_snps(self, extract_snps=None, extract_file=None, chromosome=None):
+    """
+    Filter the SNP set from all the GWADataLoader objects.
+    :param extract_snps: A list or array of SNP rsIDs to keep.
+    :param extract_file: A path to a plink-style file with SNP rsIDs to keep.
+    :param chromosome: Chromosome number. If specified, applies the filter to that chromosome only.
+    """
+
+    if extract_snps is None and extract_file is None:
+        return
+
+    if chromosome is not None:
+        chroms = [chromosome]
+    else:
+        chroms = self.chromosomes
+
+    if extract_snps is None:
+        from .parsers.misc_parsers import read_snp_filter_file
+        extract_snps = read_snp_filter_file(extract_file)
+
+    for c in chroms:
+
+        # Filter the genotype matrix:
+        if self.genotype is not None and c in self.genotype:
+            self.genotype[c].filter_snps(extract_snps=extract_snps)
+
+            # If no SNPs remain in the genotype matrix for that chromosome, then remove it:
+            if self.genotype[c].shape[1] < 1:
+                del self.genotype[c]
+
+        # Filter the summary statistics table:
+        if self.sumstats_table is not None and c in self.sumstats_table:
+            self.sumstats_table[c].filter_snps(extract_snps=extract_snps)
+
+            # If no SNPs remain in the summary statistics table for that chromosome, then remove it:
+            if self.sumstats_table[c].shape[0] < 1:
+                del self.sumstats_table[c]
+
+        if self.ld is not None and c in self.ld:
+            self.ld[c].filter_snps(extract_snps=extract_snps)
+
+            # If no SNPs remain in the summary statistics table for that chromosome, then remove it:
+            if self.ld[c].n_snps < 1:
+                del self.ld[c]
+
+        # Filter the annotation matrix:
+        if self.annotation is not None and c in self.annotation:
+            self.annotation[c].filter_snps(extract_snps=extract_snps)
+
+            if self.annotation[c].shape[0] < 1:
+                del self.annotation[c]
+
+
+
+ +
+ + +
+ + + +

+ get_ld_matrices() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The LD matrices computed for each chromosome.

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
def get_ld_matrices(self):
+    """
+    :return: The LD matrices computed for each chromosome.
+    """
+    return self.ld
+
+
+
+ +
+ + +
+ + + +

+ harmonize_data() + +

+ + +
+ +

This method ensures that the data sources (reference genotype, +LD matrices, summary statistics, annotations) are all aligned in terms of the +set of variants that they operate on as well as the designation of the effect allele for +each variant.

+
+

Note

+

This method is called automatically during the initialization of the GWADataLoader object. +However, if you read or manipulate the data sources after initialization, +you may need to call this method again to ensure that the data sources remain aligned.

+
+ +
+ Source code in magenpy/GWADataLoader.py +
def harmonize_data(self):
+    """
+    This method ensures that the data sources (reference genotype,
+    LD matrices, summary statistics, annotations) are all aligned in terms of the
+    set of variants that they operate on as well as the designation of the effect allele for
+    each variant.
+
+    !!! note
+        This method is called automatically during the initialization of the `GWADataLoader` object.
+        However, if you read or manipulate the data sources after initialization,
+        you may need to call this method again to ensure that the data sources remain aligned.
+
+    """
+
+    data_sources = (self.genotype, self.sumstats_table, self.ld, self.annotation)
+    initialized_data_sources = [ds for ds in data_sources if ds is not None]
+
+    # If less than two data sources are present, skip harmonization...
+    if len(initialized_data_sources) < 2:
+        return
+
+    # Get the chromosomes information from all the data sources:
+    chromosomes = list(set.union(*[set(ds.keys()) for ds in initialized_data_sources]))
+
+    if self.verbose and len(chromosomes) < 2:
+        print("> Harmonizing data...")
+
+    for c in tqdm(chromosomes,
+                  total=len(chromosomes),
+                  desc='Harmonizing data',
+                  disable=not self.verbose or len(chromosomes) < 2):
+
+        # Which initialized data sources have information for chromosome `c`
+        miss_chroms = [c not in ds for ds in initialized_data_sources]
+
+        if sum(miss_chroms) > 0:
+            # If the chromosome data only exists for some data sources but not others, remove the chromosome
+            # from all data source.
+            # Is this the best way to handle the missingness? Should we just post a warning?
+            for ds in initialized_data_sources:
+                if c in ds:
+                    del ds[c]
+
+        else:
+
+            # Find the set of SNPs that are shared across all data sources:
+            common_snps = np.array(list(set.intersection(*[set(ds[c].snps)
+                                                           for ds in initialized_data_sources])))
+
+            # If necessary, filter the data sources to only have the common SNPs:
+            for ds in initialized_data_sources:
+                if ds[c].n_snps != len(common_snps):
+                    ds[c].filter_snps(extract_snps=common_snps)
+
+            # Harmonize the summary statistics data with either genotype or LD reference.
+            # This procedure checks for flips in the effect allele between data sources.
+            if self.sumstats_table is not None:
+                if self.genotype is not None:
+                    self.sumstats_table[c].match(self.genotype[c].get_snp_table(col_subset=['SNP', 'A1', 'A2']))
+                elif self.ld is not None:
+                    self.sumstats_table[c].match(self.ld[c].to_snp_table(col_subset=['SNP', 'A1', 'A2']))
+
+                # If during the allele matching process we discover incompatibilities,
+                # we filter those SNPs:
+                for ds in initialized_data_sources:
+                    if ds[c].n_snps != self.sumstats_table[c].n_snps:
+                        ds[c].filter_snps(extract_snps=self.sumstats_table[c].snps)
+
+
+
+ +
+ + +
+ + + +

+ load_ld() + +

+ + +
+ +

A utility method to load the LD matrices to memory from on-disk storage.

+ +
+ Source code in magenpy/GWADataLoader.py +
def load_ld(self):
+    """
+    A utility method to load the LD matrices to memory from on-disk storage.
+    """
+    if self.ld is not None:
+        for ld in self.ld.values():
+            ld.load()
+
+
+
+ +
+ + +
+ + + +

+ perform_gwas(**gwa_kwargs) + +

+ + +
+ +

Perform genome-wide association testing of all variants against the phenotype. +This is a utility function that calls the .perform_gwas() method of the +GenotypeMatrix objects associated with GWADataLoader.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
gwa_kwargs + +
+

Keyword arguments to pass to the GWA functions. Consult stats.gwa.utils for relevant keyword arguments for each backend.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def perform_gwas(self, **gwa_kwargs):
+    """
+    Perform genome-wide association testing of all variants against the phenotype.
+    This is a utility function that calls the `.perform_gwas()` method of the
+    `GenotypeMatrix` objects associated with GWADataLoader.
+
+    :param gwa_kwargs: Keyword arguments to pass to the GWA functions. Consult stats.gwa.utils
+    for relevant keyword arguments for each backend.
+    """
+
+    if self.verbose and len(self.genotype) < 2:
+        print("> Performing GWAS...")
+
+    self.sumstats_table = {
+        c: g.perform_gwas(**gwa_kwargs)
+        for c, g in tqdm(sorted(self.genotype.items(), key=lambda x: x[0]),
+                         total=len(self.genotype),
+                         desc='Performing GWAS',
+                         disable=not self.verbose or len(self.genotype) < 2)
+    }
+
+
+
+ +
+ + +
+ + + +

+ predict(beta=None) + +

+ + +
+ +

Predict the phenotype for the genotyped samples using the provided effect size +estimates beta. For quantitative traits, this is equivalent to performing +linear scoring. For binary phenotypes, we transform the output using probit link function.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
beta + +
+

A dictionary where the keys are the chromosome numbers and the values are a vector of effect sizes for each variant on that chromosome. If the betas are not provided, we use the marginal betas by default (if those are available).

+
+
+ None +
+ +
+ Source code in magenpy/GWADataLoader.py +
def predict(self, beta=None):
+    """
+    Predict the phenotype for the genotyped samples using the provided effect size
+    estimates `beta`. For quantitative traits, this is equivalent to performing
+    linear scoring. For binary phenotypes, we transform the output using probit link function.
+
+    :param beta: A dictionary where the keys are the chromosome numbers and the
+    values are a vector of effect sizes for each variant on that chromosome. If the
+    betas are not provided, we use the marginal betas by default (if those are available).
+    """
+
+    # Perform linear scoring:
+    pgs = self.score(beta)
+
+    if self.phenotype_likelihood == 'binomial':
+        # Apply probit link function:
+        from scipy.stats import norm
+        pgs = norm.cdf(pgs)
+
+    return pgs
+
+
+
+ +
+ + +
+ + + +

+ read_annotations(annot_path, annot_format='magenpy', parser=None, **parse_kwargs) + +

+ + +
+ +

Read the annotation matrix from file. Annotations are a set of features associated +with each SNP and are generally represented in table format. +Consult the documentation for AnnotationMatrix for more details.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annot_path + +
+

The path to the annotation file(s). The path may contain a wildcard.

+
+
+ required +
annot_format + +
+

The format for the summary statistics. Currently, supports the following formats: magenpy, ldsc.

+
+
+ 'magenpy' +
parser + +
+

If the annotation file does not follow any of the formats above, you can create your own parser by inheriting from the base AnnotationMatrixParser class and passing it here as an argument.

+
+
+ None +
parse_kwargs + +
+

keyword arguments for the parser. These are mainly parameters that will be passed to pandas.read_csv function, such as the delimiter, header information, etc.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_annotations(self, annot_path,
+                     annot_format='magenpy',
+                     parser=None,
+                     **parse_kwargs):
+    """
+    Read the annotation matrix from file. Annotations are a set of features associated
+    with each SNP and are generally represented in table format.
+    Consult the documentation for `AnnotationMatrix` for more details.
+
+    :param annot_path: The path to the annotation file(s). The path may contain a wildcard.
+    :param annot_format: The format for the summary statistics. Currently, supports the following
+     formats: `magenpy`, `ldsc`.
+    :param parser: If the annotation file does not follow any of the formats above, you can create
+    your own parser by inheriting from the base `AnnotationMatrixParser` class and passing it here as an argument.
+    :param parse_kwargs: keyword arguments for the parser. These are mainly parameters that will be passed to
+    `pandas.read_csv` function, such as the delimiter, header information, etc.
+    """
+
+    if annot_path is None:
+        return
+
+    # Find all the relevant files in the path passed by the user:
+    if not iterable(annot_path):
+        annot_files = get_filenames(annot_path, extension='.annot')
+    else:
+        annot_files = annot_path
+
+    if len(annot_files) < 1:
+        warnings.warn(f"No annotation files were found at: {annot_path}")
+        return
+
+    if self.verbose and len(annot_files) < 2:
+        print("> Reading annotation file...")
+
+    self.annotation = {}
+
+    for annot_file in tqdm(annot_files,
+                           total=len(annot_files),
+                           desc="Reading annotation files",
+                           disable=not self.verbose or len(annot_files) < 2):
+        annot_mat = AnnotationMatrix.from_file(annot_file,
+                                               annot_format=annot_format,
+                                               annot_parser=parser,
+                                               **parse_kwargs)
+        self.annotation[annot_mat.chromosome] = annot_mat
+
+
+
+ +
+ + +
+ + + +

+ read_covariates(covariates_file, **read_csv_kwargs) + +

+ + +
+ +

Read the covariates file and integrate it with the sample tables and genotype matrices.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
covariates_file + +
+

The path to the covariates file (Default: tab-separated file starting with the FID IID ... columns and followed by the covariate columns).

+
+
+ required +
read_csv_kwargs + +
+

keyword arguments for the read_csv function of pandas.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_covariates(self, covariates_file, **read_csv_kwargs):
+    """
+    Read the covariates file and integrate it with the sample tables and genotype matrices.
+
+    :param covariates_file: The path to the covariates file
+    (Default: tab-separated file starting with the `FID IID ...` columns and followed by the covariate columns).
+    :param read_csv_kwargs: keyword arguments for the `read_csv` function of `pandas`.
+    """
+
+    if covariates_file is None:
+        return
+
+    if self.verbose:
+        print("> Reading covariates file...")
+
+    assert self.sample_table is not None
+
+    self.sample_table.read_covariates_file(covariates_file, **read_csv_kwargs)
+    self.sync_sample_tables()
+
+
+
+ +
+ + +
+ + + +

+ read_genotypes(bed_paths, keep_samples=None, keep_file=None, extract_snps=None, extract_file=None, min_maf=None, min_mac=1, drop_duplicated=True) + +

+ + +
+ +

Read the genotype matrix and/or associated metadata from plink's BED file format. +Consult the documentation for GenotypeMatrix for more details.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_paths + +
+

The path to the BED file(s). You may use a wildcard here to read files for multiple chromosomes.

+
+
+ required +
keep_samples + +
+

A vector or list of sample IDs to keep when filtering the genotype matrix.

+
+
+ None +
keep_file + +
+

A path to a plink-style file containing sample IDs to keep.

+
+
+ None +
extract_snps + +
+

A vector or list of SNP IDs to keep when filtering the genotype matrix.

+
+
+ None +
extract_file + +
+

A path to a plink-style file containing SNP IDs to keep.

+
+
+ None +
min_maf + +
+

The minimum minor allele frequency cutoff.

+
+
+ None +
min_mac + +
+

The minimum minor allele count cutoff.

+
+
+ 1 +
drop_duplicated + +
+

If True, drop SNPs with duplicated rsID.

+
+
+ True +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_genotypes(self,
+                   bed_paths,
+                   keep_samples=None,
+                   keep_file=None,
+                   extract_snps=None,
+                   extract_file=None,
+                   min_maf=None,
+                   min_mac=1,
+                   drop_duplicated=True):
+    """
+    Read the genotype matrix and/or associated metadata from plink's BED file format.
+    Consult the documentation for `GenotypeMatrix` for more details.
+
+    :param bed_paths: The path to the BED file(s). You may use a wildcard here to read files for multiple
+    chromosomes.
+    :param keep_samples: A vector or list of sample IDs to keep when filtering the genotype matrix.
+    :param keep_file: A path to a plink-style file containing sample IDs to keep.
+    :param extract_snps: A vector or list of SNP IDs to keep when filtering the genotype matrix.
+    :param extract_file: A path to a plink-style file containing SNP IDs to keep.
+    :param min_maf: The minimum minor allele frequency cutoff.
+    :param min_mac: The minimum minor allele count cutoff.
+    :param drop_duplicated: If True, drop SNPs with duplicated rsID.
+    """
+
+    if bed_paths is None:
+        return
+
+    # Find all the relevant files in the path passed by the user:
+    if not iterable(bed_paths):
+        bed_files = get_filenames(bed_paths, extension='.bed')
+    else:
+        bed_files = bed_paths
+
+    if len(bed_files) < 1:
+        warnings.warn(f"No BED files were found at: {bed_paths}")
+        return
+
+    # Depending on the backend, select the `GenotypeMatrix` class:
+    if self.backend == 'xarray':
+        gmat_class = xarrayGenotypeMatrix
+    else:
+        gmat_class = plinkBEDGenotypeMatrix
+
+    if self.verbose and len(bed_files) < 2:
+        print("> Reading BED file...")
+
+    self.genotype = {}
+
+    for bfile in tqdm(bed_files,
+                      total=len(bed_files),
+                      desc="Reading BED files",
+                      disable=not self.verbose or len(bed_files) < 2):
+        # Read BED file and update the genotypes dictionary:
+        self.genotype.update(gmat_class.from_file(bfile,
+                                                  temp_dir=self.temp_dir,
+                                                  threads=self.threads).split_by_chromosome())
+
+    # After reading the genotype matrices, apply some standard filters:
+    for i, (c, g) in enumerate(self.genotype.items()):
+
+        # Filter the genotype matrix to keep certain subsample:
+        if keep_samples or keep_file:
+            g.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+        # Filter the genotype matrix to keep certain SNPs
+        if extract_snps or extract_file:
+            g.filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+
+        # Drop duplicated SNP IDs
+        if drop_duplicated:
+            g.drop_duplicated_snps()
+
+        # Filter SNPs by minor allele frequency and/or count:
+        g.filter_by_allele_frequency(min_maf=min_maf, min_mac=min_mac)
+
+        if i == 0:
+            self.sample_table = g.sample_table
+
+
+
+ +
+ + +
+ + + +

+ read_ld(ld_store_paths) + +

+ + +
+ +

Read the LD matrix files stored on-disk in Zarr array format.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ld_store_paths + +
+

The path to the LD matrices. This may be a wildcard to accommodate reading data for multiple chromosomes.

+
+
+ required +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_ld(self, ld_store_paths):
+    """
+    Read the LD matrix files stored on-disk in Zarr array format.
+    :param ld_store_paths: The path to the LD matrices. This may be a wildcard to accommodate reading data
+    for multiple chromosomes.
+    """
+
+    if ld_store_paths is None:
+        return
+
+    if not iterable(ld_store_paths):
+        ld_store_files = get_filenames(ld_store_paths, extension='.zgroup')
+    else:
+        ld_store_files = ld_store_paths
+
+    if len(ld_store_files) < 1:
+        warnings.warn(f"No LD matrix files were found at: {ld_store_paths}")
+        return
+
+    if self.verbose and len(ld_store_files) < 2:
+        print("> Reading LD matrix...")
+
+    self.ld = {}
+
+    for f in tqdm(ld_store_files,
+                  total=len(ld_store_files),
+                  desc="Reading LD matrices",
+                  disable=not self.verbose or len(ld_store_files) < 2):
+        z = LDMatrix.from_path(f)
+        self.ld[z.chromosome] = z
+
+
+
+ +
+ + +
+ + + +

+ read_phenotype(phenotype_file, drop_na=True, **read_csv_kwargs) + +

+ + +
+ +

Read the phenotype file and integrate it with the sample tables and genotype matrices.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype_file + +
+

The path to the phenotype file (Default: tab-separated file with FID IID phenotype columns). If different, supply details as additional arguments to this function.

+
+
+ required +
drop_na + +
+

Drop samples with missing phenotype information.

+
+
+ True +
read_csv_kwargs + +
+

keyword arguments for the read_csv function of pandas.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_phenotype(self, phenotype_file, drop_na=True, **read_csv_kwargs):
+    """
+    Read the phenotype file and integrate it with the sample tables and genotype matrices.
+
+    :param phenotype_file: The path to the phenotype file
+    (Default: tab-separated file with `FID IID phenotype` columns). If different, supply
+    details as additional arguments to this function.
+    :param drop_na: Drop samples with missing phenotype information.
+    :param read_csv_kwargs: keyword arguments for the `read_csv` function of `pandas`.
+    """
+
+    if phenotype_file is None:
+        return
+
+    if self.verbose:
+        print("> Reading phenotype file...")
+
+    assert self.sample_table is not None
+
+    self.sample_table.read_phenotype_file(phenotype_file, drop_na=drop_na, **read_csv_kwargs)
+    self.sync_sample_tables()
+
+
+
+ +
+ + +
+ + + +

+ read_summary_statistics(sumstats_path, sumstats_format='magenpy', parser=None, drop_duplicated=True, **parse_kwargs) + +

+ + +
+ +

Read GWAS summary statistics file(s) and parse them to SumstatsTable objects.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sumstats_path + +
+

The path to the summary statistics file(s). The path may be a wildcard.

+
+
+ required +
sumstats_format + +
+

The format for the summary statistics. Currently supports the following formats: plink1.9, plink2, magenpy, fastGWA, COJO, SAIGE, or GWASCatalog for the standard summary statistics format (also known as ssf or gwas-ssf).

+
+
+ 'magenpy' +
parser + +
+

If the summary statistics file does not follow any of the formats above, you can create your own parser by inheriting from the base SumstatsParser class and passing it here as an argument.

+
+
+ None +
drop_duplicated + +
+

Drop SNPs with duplicated rsIDs.

+
+
+ True +
parse_kwargs + +
+

keyword arguments for the parser. These are mainly parameters that will be passed to pandas.read_csv function, such as the delimiter, header information, etc.

+
+
+ {} +
+ +
+ Source code in magenpy/GWADataLoader.py +
def read_summary_statistics(self,
+                            sumstats_path,
+                            sumstats_format='magenpy',
+                            parser=None,
+                            drop_duplicated=True,
+                            **parse_kwargs):
+    """
+    Read GWAS summary statistics file(s) and parse them to `SumstatsTable` objects.
+
+    :param sumstats_path: The path to the summary statistics file(s). The path may be a wildcard.
+    :param sumstats_format: The format for the summary statistics. Currently supports the following
+     formats: `plink1.9`, `plink2`, `magenpy`, `fastGWA`, `COJO`, `SAIGE`, or `GWASCatalog` for the standard
+     summary statistics format (also known as `ssf` or `gwas-ssf`).
+    :param parser: If the summary statistics file does not follow any of the formats above, you can create
+    your own parser by inheriting from the base `SumstatsParser` class and passing it here as an argument.
+    :param drop_duplicated: Drop SNPs with duplicated rsIDs.
+    :param parse_kwargs: keyword arguments for the parser. These are mainly parameters that will be passed to
+    `pandas.read_csv` function, such as the delimiter, header information, etc.
+    """
+
+    if sumstats_path is None:
+        return
+
+    if not iterable(sumstats_path):
+        sumstats_files = get_filenames(sumstats_path)
+
+        from .utils.system_utils import valid_url
+        if len(sumstats_files) < 1 and valid_url(sumstats_path):
+            sumstats_files = [sumstats_path]
+    else:
+        sumstats_files = sumstats_path
+
+    if len(sumstats_files) < 1:
+        warnings.warn(f"No summary statistics files were found at: {sumstats_path}")
+        return
+
+    if self.verbose and len(sumstats_files) < 2:
+        print("> Reading summary statistics file...")
+
+    self.sumstats_table = {}
+
+    for f in tqdm(sumstats_files,
+                  total=len(sumstats_files),
+                  desc="Reading summary statistics files",
+                  disable=not self.verbose or len(sumstats_files) < 2):
+
+        ss_tab = SumstatsTable.from_file(f, sumstats_format=sumstats_format, parser=parser, **parse_kwargs)
+
+        if drop_duplicated:
+            ss_tab.drop_duplicates()
+
+        if 'CHR' in ss_tab.table.columns:
+            self.sumstats_table.update(ss_tab.split_by_chromosome())
+        else:
+            if self.genotype is not None:
+                ref_table = {c: g.snps for c, g in self.genotype.items()}
+            elif self.ld is not None:
+                ref_table = {c: ld.snps for c, ld in self.ld.items()}
+            else:
+                raise ValueError("Cannot index summary statistics tables without chromosome information!")
+
+            self.sumstats_table.update(ss_tab.split_by_chromosome(snps_per_chrom=ref_table))
+
+
+
+ +
+ + +
+ + + +

+ release_ld() + +

+ + +
+ +

A utility function to release the LD matrices from memory.

+ +
+ Source code in magenpy/GWADataLoader.py +
def release_ld(self):
+    """
+    A utility function to release the LD matrices from memory.
+    """
+    if self.ld is not None:
+        for ld in self.ld.values():
+            ld.release()
+
+
+
+ +
+ + +
+ + + +

+ score(beta=None, standardize_genotype=False) + +

+ + +
+ +

Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, beta.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
beta + +
+

A dictionary where the keys are the chromosome numbers and the values are a vector of effect sizes for each variant on that chromosome. If the betas are not provided, we use the marginal betas by default (if those are available).

+
+
+ None +
standardize_genotype + +
+

If True, standardize the genotype matrix before scoring.

+
+
+ False +
+ +
+ Source code in magenpy/GWADataLoader.py +
def score(self, beta=None, standardize_genotype=False):
+    """
+    Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, `beta`.
+
+    :param beta: A dictionary where the keys are the chromosome numbers and the
+    values are a vector of effect sizes for each variant on that chromosome. If the
+    betas are not provided, we use the marginal betas by default (if those are available).
+    :param standardize_genotype: If True, standardize the genotype matrix before scoring.
+    """
+
+    if beta is None:
+        try:
+            beta = {c: s.marginal_beta or s.get_snp_pseudo_corr() for c, s in self.sumstats_table.items()}
+        except Exception:
+            raise ValueError("To perform linear scoring, you must provide effect size estimates (BETA)!")
+
+    # Here, we have a very ugly way of accounting for
+    # the fact that the chromosomes may be coded differently between the genotype
+    # and the beta dictionary. Maybe we can find a better solution in the future.
+    common_chr_g, common_chr_b = match_chromosomes(self.genotype.keys(), beta.keys(), return_both=True)
+
+    if len(common_chr_g) < 1:
+        raise ValueError("No common chromosomes found between the genotype and the effect size estimates!")
+
+    if self.verbose and len(common_chr_g) < 2:
+        print("> Generating polygenic scores...")
+
+    pgs = None
+
+    for c_g, c_b in tqdm(zip(common_chr_g, common_chr_b),
+                         total=len(common_chr_g),
+                         desc='Generating polygenic scores',
+                         disable=not self.verbose or len(common_chr_g) < 2):
+
+        if pgs is None:
+            pgs = self.genotype[c_g].score(beta[c_b], standardize_genotype=standardize_genotype)
+        else:
+            pgs += self.genotype[c_g].score(beta[c_b], standardize_genotype=standardize_genotype)
+
+    # If we only have a single set of betas, flatten the PGS vector:
+    if len(pgs.shape) > 1 and pgs.shape[1] == 1:
+        pgs = pgs.flatten()
+
+    return pgs
+
+
+
+ +
+ + +
+ + + +

+ set_phenotype(new_phenotype, phenotype_likelihood=None) + +

+ + +
+ +

A convenience method to update the phenotype column for the samples.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_phenotype + +
+

A vector or list of phenotype values.

+
+
+ required +
phenotype_likelihood + +
+

The phenotype likelihood (e.g. binomial, gaussian). Optional.

+
+
+ None +
+ +
+ Source code in magenpy/GWADataLoader.py +
def set_phenotype(self, new_phenotype, phenotype_likelihood=None):
+    """
+    A convenience method to update the phenotype column for the samples.
+    :param new_phenotype: A vector or list of phenotype values.
+    :param phenotype_likelihood: The phenotype likelihood (e.g. `binomial`, `gaussian`). Optional.
+    """
+
+    self.sample_table.set_phenotype(new_phenotype,
+                                    phenotype_likelihood=phenotype_likelihood or self.phenotype_likelihood)
+    self.sync_sample_tables()
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome() + +

+ + +
+ +

A utility method to split a GWADataLoader object by chromosome ID, such that +we would have one GWADataLoader object per chromosome. The method returns a dictionary +where the key is the chromosome number and the value is the GWADataLoader object corresponding +to that chromosome only.

+ +
+ Source code in magenpy/GWADataLoader.py +
def split_by_chromosome(self):
+    """
+    A utility method to split a GWADataLoader object by chromosome ID, such that
+    we would have one `GWADataLoader` object per chromosome. The method returns a dictionary
+    where the key is the chromosome number and the value is the `GWADataLoader` object corresponding
+    to that chromosome only.
+    """
+
+    if len(self.chromosomes) == 1:
+        return {self.chromosomes[0]: self}
+
+    else:
+        split_dict = {}
+
+        for c in self.chromosomes:
+            split_dict[c] = copy.copy(self)
+
+            if self.genotype is not None and c in self.genotype:
+                split_dict[c].genotype = {c: self.genotype[c]}
+            if self.sumstats_table is not None and c in self.sumstats_table:
+                split_dict[c].sumstats_table = {c: self.sumstats_table[c]}
+            if self.ld is not None and c in self.ld:
+                split_dict[c].ld = {c: self.ld[c]}
+            if self.annotation is not None and c in self.annotation:
+                split_dict[c].annotation = {c: self.annotation[c]}
+
+        return split_dict
+
+
+
+ +
+ + +
+ + + +

+ split_by_samples(proportions=None, groups=None, keep_original=True) + +

+ + +
+ +

Split the GWADataLoader object by samples, if genotype or sample data +is available. The user must provide a list or proportion of samples in each split, +and the method will return a list of GWADataLoader objects with only the samples +designated for each split. This may be a useful utility for training/testing split or some +other downstream tasks.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
proportions + +
+

A list with the proportion of samples in each split. Must add to 1.

+
+
+ None +
groups + +
+

A list of lists containing the sample IDs in each split.

+
+
+ None +
keep_original + +
+

If True, keep the original GWADataLoader object and do not transform it in the splitting process.

+
+
+ True +
+ +
+ Source code in magenpy/GWADataLoader.py +
def split_by_samples(self, proportions=None, groups=None, keep_original=True):
+    """
+    Split the `GWADataLoader` object by samples, if genotype or sample data
+    is available. The user must provide a list or proportion of samples in each split,
+    and the method will return a list of `GWADataLoader` objects with only the samples
+    designated for each split. This may be a useful utility for training/testing split or some
+    other downstream tasks.
+
+    :param proportions: A list with the proportion of samples in each split. Must add to 1.
+    :param groups: A list of lists containing the sample IDs in each split.
+    :param keep_original: If True, keep the original `GWADataLoader` object and do not
+    transform it in the splitting process.
+    """
+
+    if self.sample_table is None:
+        raise ValueError("The sample table is not set!")
+
+    if groups is None:
+        if proportions is None:
+            raise ValueError("To split a `GWADataloader` object by samples, the user must provide either the list "
+                             "or proportion of individuals in each split.")
+        else:
+
+            # Assign each sample to a different split randomly by drawing from a multinomial:
+            random_split = np.random.multinomial(1, proportions, size=self.sample_size).astype(bool)
+            # Extract the individuals in each group from the multinomial sample:
+            groups = [self.samples[random_split[:, i]] for i in range(random_split.shape[1])]
+
+    gdls = []
+    for i, g in enumerate(groups):
+
+        if len(g) < 1:
+            raise ValueError(f"Group {i} is empty! Please ensure that all splits have at least one sample.")
+
+        if (i + 1) == len(groups) and not keep_original:
+            new_gdl = self
+        else:
+            new_gdl = copy.deepcopy(self)
+
+        new_gdl.filter_samples(keep_samples=g)
+
+        gdls.append(new_gdl)
+
+    return gdls
+
+
+
+ +
+ + +
+ + + +

+ sync_sample_tables() + +

+ + +
+ +

A utility method to sync the sample tables of the +GenotypeMatrix objects with the sample table under +the GWADataLoader object. This is especially important +when setting new phenotypes (from the simulators) or reading +covariates files, etc.

+ +
+ Source code in magenpy/GWADataLoader.py +
def sync_sample_tables(self):
+    """
+    A utility method to sync the sample tables of the
+    `GenotypeMatrix` objects with the sample table under
+    the `GWADataLoader` object. This is especially important
+    when setting new phenotypes (from the simulators) or reading
+    covariates files, etc.
+    """
+
+    for c, g in self.genotype.items():
+        g.set_sample_table(self.sample_table)
+
+
+
+ +
+ + +
+ + + +

+ to_individual_table() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A plink-style dataframe of individual IDs, in the form of Family ID (FID) and Individual ID (IID).

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
def to_individual_table(self):
+    """
+    :return: A plink-style dataframe of individual IDs, in the form of
+    Family ID (FID) and Individual ID (IID).
+    """
+
+    return self.sample_table.get_individual_table()
+
+
+
+ +
+ + +
+ + + +

+ to_phenotype_table() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A plink-style dataframe with each individual's Family ID (FID), Individual ID (IID), and phenotype value.

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
def to_phenotype_table(self):
+    """
+    :return: A plink-style dataframe with each individual's Family ID (FID),
+    Individual ID (IID), and phenotype value.
+    """
+
+    return self.sample_table.get_phenotype_table()
+
+
+
+ +
+ + +
+ + + +

+ to_snp_table(col_subset=None, per_chromosome=False) + +

+ + +
+ +

Get a dataframe of SNP data for all variants +across different chromosomes.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

The subset of columns to obtain.

+
+
+ None +
per_chromosome + +
+

If True, returns a dictionary where the key is the chromosome number and the value is the SNP table per chromosome.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dataframe (or dictionary of dataframes) of SNP data.

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
def to_snp_table(self, col_subset=None, per_chromosome=False):
+    """
+    Get a dataframe of SNP data for all variants
+    across different chromosomes.
+
+    :param col_subset: The subset of columns to obtain.
+    :param per_chromosome: If True, returns a dictionary where the key
+    is the chromosome number and the value is the SNP table per
+    chromosome.
+
+    :return: A dataframe (or dictionary of dataframes) of SNP data.
+    """
+
+    snp_tables = {}
+
+    for c in self.chromosomes:
+        if self.sumstats_table is not None:
+            snp_tables[c] = self.sumstats_table[c].to_table(col_subset=col_subset)
+        elif self.genotype is not None:
+            snp_tables[c] = self.genotype[c].get_snp_table(col_subset=col_subset)
+        elif self.ld is not None:
+            snp_tables[c] = self.ld[c].to_snp_table(col_subset=col_subset)
+        else:
+            raise ValueError("GWADataLoader instance is not properly initialized!")
+
+    if per_chromosome:
+        return snp_tables
+    else:
+        return pd.concat(list(snp_tables.values()))
+
+
+
+ +
+ + +
+ + + +

+ to_summary_statistics_table(col_subset=None, per_chromosome=False) + +

+ + +
+ +

Get a dataframe of the GWAS summary statistics for all variants +across different chromosomes.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

The subset of columns (or summary statistics) to obtain.

+
+
+ None +
per_chromosome + +
+

If True, returns a dictionary where the key is the chromosome number and the value is the summary statistics table per chromosome.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dataframe (or dictionary of dataframes) of summary statistics.

+
+
+ +
+ Source code in magenpy/GWADataLoader.py +
def to_summary_statistics_table(self, col_subset=None, per_chromosome=False):
+    """
+    Get a dataframe of the GWAS summary statistics for all variants
+    across different chromosomes.
+
+    :param col_subset: The subset of columns (or summary statistics) to obtain.
+    :param per_chromosome: If True, returns a dictionary where the key
+    is the chromosome number and the value is the summary statistics table per
+    chromosome.
+
+    :return: A dataframe (or dictionary of dataframes) of summary statistics.
+    """
+
+    assert self.sumstats_table is not None
+
+    snp_tables = {}
+
+    for c in self.chromosomes:
+        snp_tables[c] = self.sumstats_table[c].to_table(col_subset=col_subset)
+
+    if per_chromosome:
+        return snp_tables
+    else:
+        return pd.concat(list(snp_tables.values()))
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/GenotypeMatrix/index.html b/api/GenotypeMatrix/index.html new file mode 100644 index 0000000..653be9e --- /dev/null +++ b/api/GenotypeMatrix/index.html @@ -0,0 +1,7845 @@ + + + + + + + + + + + + + + + + + + + GenotypeMatrix - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

GenotypeMatrix

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ GenotypeMatrix + + +

+ + +
+

+ Bases: object

+ + +

A class to represent a genotype matrix. The genotype matrix is a matrix of +where the rows represent samples and the columns represent genetic variants. +In general, genotype matrices are assumed to reside on disk and this class +provides a convenient interface to interact with and perform computations +on the genotype matrix.

+

Currently, we assume that the genotype matrix is stored using plink's BED +file format, with associated tables for the samples (i.e. FAM file) and genetic +variants (i.e. BIM file). Classes that inherit from this generic class support +various backends to access and performing computations on this genotype data.

+
+

See Also

+
* [xarrayGenotypeMatrix][magenpy.GenotypeMatrix.xarrayGenotypeMatrix]
+* [plinkBEDGenotypeMatrix][magenpy.GenotypeMatrix.plinkBEDGenotypeMatrix]
+
+
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
sample_table + Union[DataFrame, SampleTable, None] + +
+

A table containing information about the samples in the genotype matrix (initially read from the FAM file).

+
+
snp_table + Union[DataFrame, None] + +
+

A table containing information about the genetic variants in the genotype matrix (initially read from the BIM file).

+
+
bed_file + +
+

The path to the plink BED file containing the genotype matrix.

+
+
_genome_build + +
+

The genome build or assembly under which the SNP coordinates are defined.

+
+
temp_dir + +
+

The directory where temporary files will be stored (if needed).

+
+
cleanup_dir_list + +
+

A list of directories to clean up after execution.

+
+
threads + +
+

The number of threads to use for parallel computations.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
class GenotypeMatrix(object):
+    """
+    A class to represent a genotype matrix. The genotype matrix is a matrix of
+    where the rows represent samples and the columns represent genetic variants.
+    In general, genotype matrices are assumed to reside on disk and this class
+    provides a convenient interface to interact with and perform computations
+    on the genotype matrix.
+
+    Currently, we assume that the genotype matrix is stored using plink's BED
+    file format, with associated tables for the samples (i.e. FAM file) and genetic
+    variants (i.e. BIM file). Classes that inherit from this generic class support
+    various backends to access and performing computations on this genotype data.
+
+    !!! seealso "See Also"
+            * [xarrayGenotypeMatrix][magenpy.GenotypeMatrix.xarrayGenotypeMatrix]
+            * [plinkBEDGenotypeMatrix][magenpy.GenotypeMatrix.plinkBEDGenotypeMatrix]
+
+    :ivar sample_table: A table containing information about the samples in the genotype matrix
+    (initially read from the FAM file).
+    :ivar snp_table: A table containing information about the genetic variants in the genotype matrix
+    (initially read from the BIM file).
+    :ivar bed_file: The path to the plink BED file containing the genotype matrix.
+    :ivar _genome_build: The genome build or assembly under which the SNP coordinates are defined.
+    :ivar temp_dir: The directory where temporary files will be stored (if needed).
+    :ivar cleanup_dir_list: A list of directories to clean up after execution.
+    :ivar threads: The number of threads to use for parallel computations.
+
+    """
+
+    def __init__(self,
+                 sample_table: Union[pd.DataFrame, SampleTable, None] = None,
+                 snp_table: Union[pd.DataFrame, None] = None,
+                 temp_dir: str = 'temp',
+                 bed_file: str = None,
+                 genome_build=None,
+                 threads=1,
+                 **kwargs):
+        """
+        Initialize a GenotypeMatrix object.
+
+        :param sample_table: A table containing information about the samples in the genotype matrix.
+        :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+        :param temp_dir: The directory where temporary files will be stored (if needed).
+        :param bed_file: The path to the plink BED file containing the genotype matrix.
+        :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+        :param threads: The number of threads to use for parallel computations.
+        :param kwargs: Additional keyword arguments.
+        """
+
+        self.sample_table: Union[pd.DataFrame, SampleTable, None] = None
+        self.snp_table: Union[pd.DataFrame, None] = snp_table
+
+        if sample_table is not None:
+            self.set_sample_table(sample_table)
+
+        if snp_table is not None:
+            self.snp_table['original_index'] = np.arange(len(self.snp_table))
+
+        from .utils.system_utils import makedir
+
+        makedir(temp_dir)
+
+        self.bed_file = bed_file
+        self._genome_build = genome_build
+        self.temp_dir = temp_dir
+        self.cleanup_dir_list = []  # Directories to clean up after execution.
+
+        self.threads = threads
+
+    @classmethod
+    def from_file(cls, file_path, temp_dir='temp', **kwargs):
+        """
+        Initialize a genotype matrix object by passing a file path + other keyword arguments.
+        :param file_path: The path to the plink BED file.
+        :type file_path: str
+        :param temp_dir: The directory where temporary files will be stored.
+        :type temp_dir: str
+        :param kwargs: Additional keyword arguments.
+        """
+        raise NotImplementedError
+
+    @property
+    def shape(self):
+        """
+        :return: The shape of the genotype matrix. Rows correspond to the
+        number of samples and columns to the number of SNPs.
+        """
+        return self.n, self.m
+
+    @property
+    def n(self):
+        """
+        !!! seealso "See Also"
+            * [sample_size][magenpy.GenotypeMatrix.GenotypeMatrix.sample_size]
+
+        :return: The sample size or number of individuals in the genotype matrix.
+        """
+        return self.sample_table.n
+
+    @property
+    def sample_size(self):
+        """
+        !!! seealso "See Also"
+            * [n][magenpy.GenotypeMatrix.GenotypeMatrix.n]
+
+        :return: The sample size or number of individuals in the genotype matrix.
+        """
+        return self.n
+
+    @property
+    def samples(self):
+        """
+        :return: An array of sample IDs in the genotype matrix.
+        """
+        return self.sample_table.iid
+
+    @property
+    def m(self):
+        """
+
+        !!! seealso "See Also"
+            * [n_snps][magenpy.GenotypeMatrix.GenotypeMatrix.n_snps]
+
+        :return: The number of variants in the genotype matrix.
+        """
+        if self.snp_table is not None:
+            return len(self.snp_table)
+
+    @property
+    def n_snps(self):
+        """
+        !!! seealso "See Also"
+            * [m][magenpy.GenotypeMatrix.GenotypeMatrix.m]
+
+        :return: The number of variants in the genotype matrix.
+        """
+        return self.m
+
+    @property
+    def genome_build(self):
+        """
+        :return: The genome build or assembly under which the SNP coordinates are defined.
+        """
+        return self._genome_build
+
+    @property
+    def chromosome(self):
+        """
+        :return: The chromosome associated with the variants in the genotype matrix.
+
+        ..note::
+        This is a convenience method that assumes that the genotype matrix contains variants
+        from a single chromosome. If there are multiple chromosomes, the method will return `None`.
+
+        """
+        chrom = self.chromosomes
+        if chrom is not None and len(chrom) == 1:
+            return chrom[0]
+
+    @property
+    def chromosomes(self):
+        """
+        :return: The unique set of chromosomes comprising the genotype matrix.
+        """
+        chrom = self.get_snp_attribute('CHR')
+        if chrom is not None:
+            return np.unique(chrom)
+
+    @property
+    def snps(self):
+        """
+        :return: The SNP rsIDs for variants in the genotype matrix.
+        """
+        return self.get_snp_attribute('SNP')
+
+    @property
+    def bp_pos(self):
+        """
+        :return: The basepair position for the genetic variants in the genotype matrix.
+        """
+        return self.get_snp_attribute('POS')
+
+    @property
+    def cm_pos(self):
+        """
+        :return: The position of genetic variants in the genotype matrix in units of Centi Morgan.
+        :raises KeyError: If the genetic distance is not set in the genotype file.
+        """
+        cm = self.get_snp_attribute('cM')
+        if len(set(cm)) == 1:
+            raise KeyError("Genetic distance in centi Morgan (cM) is not "
+                           "set in the genotype file!")
+        return cm
+
+    @property
+    def a1(self):
+        """
+        !!! seealso "See Also"
+            * [alt_allele][magenpy.GenotypeMatrix.GenotypeMatrix.alt_allele]
+            * [effect_allele][magenpy.GenotypeMatrix.GenotypeMatrix.effect_allele]
+
+        :return: The effect allele `A1` for each genetic variant.
+
+        """
+        return self.get_snp_attribute('A1')
+
+    @property
+    def a2(self):
+        """
+
+        !!! seealso "See Also"
+            * [ref_allele][magenpy.GenotypeMatrix.GenotypeMatrix.ref_allele]
+
+        :return: The reference allele `A2` for each genetic variant.
+
+        """
+        return self.get_snp_attribute('A2')
+
+    @property
+    def ref_allele(self):
+        """
+
+        !!! seealso "See Also"
+            * [a2][magenpy.GenotypeMatrix.GenotypeMatrix.a2]
+
+        :return: The reference allele `A2` for each genetic variant.
+        """
+        return self.a2
+
+    @property
+    def alt_allele(self):
+        """
+        !!! seealso "See Also"
+            * [effect_allele][magenpy.GenotypeMatrix.GenotypeMatrix.effect_allele]
+            * [a1][magenpy.GenotypeMatrix.GenotypeMatrix.a1]
+
+        :return: The effect allele `A1` for each genetic variant.
+
+        """
+        return self.a1
+
+    @property
+    def effect_allele(self):
+        """
+
+        !!! seealso "See Also"
+            * [alt_allele][magenpy.GenotypeMatrix.GenotypeMatrix.alt_allele]
+            * [a1][magenpy.GenotypeMatrix.GenotypeMatrix.a1]
+
+        :return: The effect allele `A1` for each genetic variant.
+
+        """
+        return self.a1
+
+    @property
+    def n_per_snp(self):
+        """
+        :return: Sample size per genetic variant (accounting for potential missing values).
+        """
+        n = self.get_snp_attribute('N')
+        if n is not None:
+            return n
+        else:
+            self.compute_sample_size_per_snp()
+            return self.get_snp_attribute('N')
+
+    @property
+    def maf(self):
+        """
+        :return: The minor allele frequency (MAF) of each variant in the genotype matrix.
+        """
+        maf = self.get_snp_attribute('MAF')
+        if maf is not None:
+            return maf
+        else:
+            self.compute_allele_frequency()
+            return self.get_snp_attribute('MAF')
+
+    @property
+    def maf_var(self):
+        """
+        :return: The variance in minor allele frequency (MAF) of each variant in the genotype matrix.
+        """
+        return 2. * self.maf * (1. - self.maf)
+
+    def estimate_memory_allocation(self, dtype=np.float32):
+        """
+        :return: An estimate of the memory allocation for the genotype matrix in megabytes.
+        """
+        return self.n * self.m * np.dtype(dtype).itemsize / 1024 ** 2
+
+    def get_snp_table(self, col_subset=None):
+        """
+        A convenience method to extract SNP-related information from the genotype matrix.
+        :param col_subset: A list of columns to extract from the SNP table.
+
+        :return: A `pandas` DataFrame with the requested columns.
+        """
+
+        if col_subset is None:
+            return self.snp_table.copy()
+        else:
+            present_cols = list(set(col_subset).intersection(set(self.snp_table.columns)))
+            non_present_cols = list(set(col_subset) - set(present_cols))
+
+            if len(present_cols) > 0:
+                table = self.snp_table[present_cols].copy()
+            else:
+                table = pd.DataFrame({c: [] for c in non_present_cols})
+
+            for col in non_present_cols:
+
+                if col == 'MAF':
+                    table['MAF'] = self.maf
+                elif col == 'MAF_VAR':
+                    table['MAF_VAR'] = self.maf_var
+                elif col == 'N':
+                    table['N'] = self.n_per_snp
+                else:
+                    raise KeyError(f"Column '{col}' is not available in the SNP table!")
+
+            return table[list(col_subset)]
+
+    def get_snp_attribute(self, attr):
+        """
+
+        :param attr: The name of the attribute to extract from the SNP table.
+        :return: The values of a specific attribute for each variant in the genotype matrix.
+        """
+        if self.snp_table is not None and attr in self.snp_table.columns:
+            return self.snp_table[attr].values
+
+    def compute_ld(self,
+                   estimator,
+                   output_dir,
+                   dtype='int16',
+                   compressor_name='lz4',
+                   compression_level=5,
+                   **ld_kwargs):
+        """
+
+        Compute the Linkage-Disequilibrium (LD) or SNP-by-SNP correlation matrix
+        for the variants defined in the genotype matrix.
+
+        :param estimator: The estimator for the LD matrix. We currently support
+        4 different estimators: `sample`, `windowed`, `shrinkage`, and `block`.
+        :param output_dir: The output directory where the Zarr array containing the
+        entries of the LD matrix will be stored.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compressor to use for the Zarr array.
+        :param compression_level: The compression level for the Zarr array (1-9)
+        :param ld_kwargs: keyword arguments for the various LD estimators. Consult
+        the implementations of `WindowedLD`, `ShrinkageLD`, and `BlockLD` for details.
+        """
+
+        from .stats.ld.estimator import SampleLD, WindowedLD, ShrinkageLD, BlockLD
+
+        if estimator == 'sample':
+            ld_est = SampleLD(self)
+        elif estimator == 'windowed':
+            ld_est = WindowedLD(self, **ld_kwargs)
+        elif estimator == 'shrinkage':
+            ld_est = ShrinkageLD(self, **ld_kwargs)
+        elif estimator == 'block':
+            ld_est = BlockLD(self, **ld_kwargs)
+        else:
+            raise KeyError(f"LD estimator {estimator} is not recognized!")
+
+        # Create a temporary directory where we store intermediate results:
+        tmp_ld_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='ld_')
+        self.cleanup_dir_list.append(tmp_ld_dir)
+
+        return ld_est.compute(output_dir,
+                              temp_dir=tmp_ld_dir.name,
+                              dtype=dtype,
+                              compressor_name=compressor_name,
+                              compression_level=compression_level)
+
+    def set_sample_table(self, sample_table):
+        """
+        A convenience method set the sample table for the genotype matrix.
+        This may be useful for syncing sample tables across different Genotype matrices
+        corresponding to different chromosomes or genomic regions.
+
+        :param sample_table: An instance of SampleTable or a pandas dataframe containing
+        information about the samples in the genotype matrix.
+
+        """
+
+        if isinstance(sample_table, SampleTable):
+            self.sample_table = sample_table
+        elif isinstance(sample_table, pd.DataFrame):
+            self.sample_table = SampleTable(sample_table)
+        else:
+            raise ValueError("The sample table is invalid! "
+                             "Has to be either an instance of "
+                             "SampleTable or pandas DataFrame.")
+
+    def filter_snps(self, extract_snps=None, extract_file=None):
+        """
+        Filter variants from the genotype matrix. User must specify
+        either a list of variants to extract or the path to a plink-style file
+        with the list of variants to extract.
+
+        :param extract_snps: A list (or array) of SNP IDs to keep in the genotype matrix.
+        :param extract_file: The path to a file with the list of variants to extract.
+        """
+
+        assert extract_snps is not None or extract_file is not None
+
+        if extract_snps is None:
+            from .parsers.misc_parsers import read_snp_filter_file
+            extract_snps = read_snp_filter_file(extract_file)
+
+        self.snp_table = self.snp_table.merge(pd.DataFrame({'SNP': extract_snps}))
+
+    def filter_by_allele_frequency(self, min_maf=None, min_mac=1):
+        """
+        Filter variants by minimum minor allele frequency or allele count cutoffs.
+
+        :param min_maf: Minimum minor allele frequency
+        :param min_mac: Minimum minor allele count (1 by default)
+        """
+
+        if min_mac or min_maf:
+
+            maf = self.maf
+            n = self.n_per_snp
+
+            keep_flag = None
+
+            if min_mac:
+                mac = (2*maf*n).astype(np.int64)
+                keep_flag = (mac >= min_mac) & ((2*n - mac) >= min_mac)
+
+            if min_maf:
+
+                maf_cond = (maf >= min_maf) & (1. - maf >= min_maf)
+                if keep_flag is not None:
+                    keep_flag = keep_flag & maf_cond
+                else:
+                    keep_flag = maf_cond
+
+            if keep_flag is not None:
+                self.filter_snps(extract_snps=self.snps[keep_flag])
+
+    def drop_duplicated_snps(self):
+        """
+        A convenience method to drop variants with duplicated SNP rsIDs.
+        """
+
+        u_snps, counts = np.unique(self.snps, return_counts=True)
+        if len(u_snps) < self.n_snps:
+            # Keep only SNPs which occur once in the sequence:
+            self.filter_snps(u_snps[counts == 1])
+
+    def filter_samples(self, keep_samples=None, keep_file=None):
+        """
+        Filter samples from the genotype matrix. User must specify
+        either a list of samples to keep or the path to a plink-style file
+        with the list of samples to keep.
+
+        :param keep_samples: A list (or array) of sample IDs to keep in the genotype matrix.
+        :param keep_file: The path to a file with the list of samples to keep.
+        """
+
+        self.sample_table.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+    def score(self, beta, standardize_genotype=False):
+        """
+        Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, `beta`.
+
+        :param beta: A vector of effect sizes for each variant in the genotype matrix.
+        :param standardize_genotype: If True, standardized the genotype matrix when computing the score.
+        """
+        raise NotImplementedError
+
+    def perform_gwas(self, **gwa_kwargs):
+        """
+        Perform genome-wide association testing of all variants against the phenotype.
+
+        :param gwa_kwargs: Keyword arguments to pass to the GWA functions. Consult `stats.gwa.utils`
+        for relevant keyword arguments for each backend.
+
+        :raises NotImplementedError: If the method is not implemented in the subclass.
+        """
+        raise NotImplementedError
+
+    def compute_allele_frequency(self):
+        """
+        Compute the allele frequency of each variant or SNP in the genotype matrix.
+
+        :raises NotImplementedError: If the method is not implemented in the subclass.
+        """
+        raise NotImplementedError
+
+    def compute_sample_size_per_snp(self):
+        """
+        Compute the sample size for each variant in the genotype matrix, accounting for
+        potential missing values.
+
+        :raises NotImplementedError: If the method is not implemented in the subclass.
+        """
+        raise NotImplementedError
+
+    def split_by_chromosome(self):
+        """
+        Split the genotype matrix by chromosome, so that we would
+        have a separate `GenotypeMatrix` objects for each chromosome.
+        This method returns a dictionary where the key is the chromosome number
+        and the value is an object of `GenotypeMatrix` for that chromosome.
+
+        :return: A dictionary of `GenotypeMatrix` objects, one for each chromosome.
+        """
+
+        chromosome = self.chromosome
+
+        if chromosome:
+            return {chromosome: self}
+        else:
+            chrom_tables = self.snp_table.groupby('CHR')
+            return {
+                c: self.__class__(sample_table=self.sample_table,
+                                  snp_table=chrom_tables.get_group(c),
+                                  temp_dir=self.temp_dir)
+                for c in chrom_tables.groups
+            }
+
+    def cleanup(self):
+        """
+        Clean up all temporary files and directories
+        """
+
+        for tmp in self.cleanup_dir_list:
+            try:
+                tmp.cleanup()
+            except FileNotFoundError:
+                continue
+
+
+ + + +
+ + + + + + + +
+ + + +

+ a1 + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The effect allele A1 for each genetic variant.

+
+
+
+ +
+ +
+ + + +

+ a2 + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The reference allele A2 for each genetic variant.

+
+
+
+ +
+ +
+ + + +

+ alt_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The effect allele A1 for each genetic variant.

+
+
+
+ +
+ +
+ + + +

+ bp_pos + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The basepair position for the genetic variants in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ chromosome + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chromosome associated with the variants in the genotype matrix. ..note:: This is a convenience method that assumes that the genotype matrix contains variants from a single chromosome. If there are multiple chromosomes, the method will return None.

+
+
+
+ +
+ +
+ + + +

+ chromosomes + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The unique set of chromosomes comprising the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ cm_pos + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The position of genetic variants in the genotype matrix in units of Centi Morgan.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the genetic distance is not set in the genotype file.

+
+
+
+ +
+ +
+ + + +

+ effect_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The effect allele A1 for each genetic variant.

+
+
+
+ +
+ +
+ + + +

+ genome_build + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The genome build or assembly under which the SNP coordinates are defined.

+
+
+
+ +
+ +
+ + + +

+ m + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ maf + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The minor allele frequency (MAF) of each variant in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ maf_var + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The variance in minor allele frequency (MAF) of each variant in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ n + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size or number of individuals in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ n_per_snp + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

Sample size per genetic variant (accounting for potential missing values).

+
+
+
+ +
+ +
+ + + +

+ n_snps + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • m
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ ref_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The reference allele A2 for each genetic variant.

+
+
+
+ +
+ +
+ + + +

+ sample_size + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • n
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size or number of individuals in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ samples + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An array of sample IDs in the genotype matrix.

+
+
+
+ +
+ +
+ + + +

+ shape + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The shape of the genotype matrix. Rows correspond to the number of samples and columns to the number of SNPs.

+
+
+
+ +
+ +
+ + + +

+ snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The SNP rsIDs for variants in the genotype matrix.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(sample_table=None, snp_table=None, temp_dir='temp', bed_file=None, genome_build=None, threads=1, **kwargs) + +

+ + +
+ +

Initialize a GenotypeMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + Union[DataFrame, SampleTable, None] + +
+

A table containing information about the samples in the genotype matrix.

+
+
+ None +
snp_table + Union[DataFrame, None] + +
+

A table containing information about the genetic variants in the genotype matrix.

+
+
+ None +
temp_dir + str + +
+

The directory where temporary files will be stored (if needed).

+
+
+ 'temp' +
bed_file + str + +
+

The path to the plink BED file containing the genotype matrix.

+
+
+ None +
genome_build + +
+

The genome build or assembly under which the SNP coordinates are defined.

+
+
+ None +
threads + +
+

The number of threads to use for parallel computations.

+
+
+ 1 +
kwargs + +
+

Additional keyword arguments.

+
+
+ {} +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
def __init__(self,
+             sample_table: Union[pd.DataFrame, SampleTable, None] = None,
+             snp_table: Union[pd.DataFrame, None] = None,
+             temp_dir: str = 'temp',
+             bed_file: str = None,
+             genome_build=None,
+             threads=1,
+             **kwargs):
+    """
+    Initialize a GenotypeMatrix object.
+
+    :param sample_table: A table containing information about the samples in the genotype matrix.
+    :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+    :param temp_dir: The directory where temporary files will be stored (if needed).
+    :param bed_file: The path to the plink BED file containing the genotype matrix.
+    :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+    :param threads: The number of threads to use for parallel computations.
+    :param kwargs: Additional keyword arguments.
+    """
+
+    self.sample_table: Union[pd.DataFrame, SampleTable, None] = None
+    self.snp_table: Union[pd.DataFrame, None] = snp_table
+
+    if sample_table is not None:
+        self.set_sample_table(sample_table)
+
+    if snp_table is not None:
+        self.snp_table['original_index'] = np.arange(len(self.snp_table))
+
+    from .utils.system_utils import makedir
+
+    makedir(temp_dir)
+
+    self.bed_file = bed_file
+    self._genome_build = genome_build
+    self.temp_dir = temp_dir
+    self.cleanup_dir_list = []  # Directories to clean up after execution.
+
+    self.threads = threads
+
+
+
+ +
+ + +
+ + + +

+ cleanup() + +

+ + +
+ +

Clean up all temporary files and directories

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def cleanup(self):
+    """
+    Clean up all temporary files and directories
+    """
+
+    for tmp in self.cleanup_dir_list:
+        try:
+            tmp.cleanup()
+        except FileNotFoundError:
+            continue
+
+
+
+ +
+ + +
+ + + +

+ compute_allele_frequency() + +

+ + +
+ +

Compute the allele frequency of each variant or SNP in the genotype matrix.

+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ NotImplementedError + +
+

If the method is not implemented in the subclass.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_allele_frequency(self):
+    """
+    Compute the allele frequency of each variant or SNP in the genotype matrix.
+
+    :raises NotImplementedError: If the method is not implemented in the subclass.
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ compute_ld(estimator, output_dir, dtype='int16', compressor_name='lz4', compression_level=5, **ld_kwargs) + +

+ + +
+ +

Compute the Linkage-Disequilibrium (LD) or SNP-by-SNP correlation matrix +for the variants defined in the genotype matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
estimator + +
+

The estimator for the LD matrix. We currently support 4 different estimators: sample, windowed, shrinkage, and block.

+
+
+ required +
output_dir + +
+

The output directory where the Zarr array containing the entries of the LD matrix will be stored.

+
+
+ required +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the Zarr array.

+
+
+ 'lz4' +
compression_level + +
+

The compression level for the Zarr array (1-9)

+
+
+ 5 +
ld_kwargs + +
+

keyword arguments for the various LD estimators. Consult the implementations of WindowedLD, ShrinkageLD, and BlockLD for details.

+
+
+ {} +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_ld(self,
+               estimator,
+               output_dir,
+               dtype='int16',
+               compressor_name='lz4',
+               compression_level=5,
+               **ld_kwargs):
+    """
+
+    Compute the Linkage-Disequilibrium (LD) or SNP-by-SNP correlation matrix
+    for the variants defined in the genotype matrix.
+
+    :param estimator: The estimator for the LD matrix. We currently support
+    4 different estimators: `sample`, `windowed`, `shrinkage`, and `block`.
+    :param output_dir: The output directory where the Zarr array containing the
+    entries of the LD matrix will be stored.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor to use for the Zarr array.
+    :param compression_level: The compression level for the Zarr array (1-9)
+    :param ld_kwargs: keyword arguments for the various LD estimators. Consult
+    the implementations of `WindowedLD`, `ShrinkageLD`, and `BlockLD` for details.
+    """
+
+    from .stats.ld.estimator import SampleLD, WindowedLD, ShrinkageLD, BlockLD
+
+    if estimator == 'sample':
+        ld_est = SampleLD(self)
+    elif estimator == 'windowed':
+        ld_est = WindowedLD(self, **ld_kwargs)
+    elif estimator == 'shrinkage':
+        ld_est = ShrinkageLD(self, **ld_kwargs)
+    elif estimator == 'block':
+        ld_est = BlockLD(self, **ld_kwargs)
+    else:
+        raise KeyError(f"LD estimator {estimator} is not recognized!")
+
+    # Create a temporary directory where we store intermediate results:
+    tmp_ld_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='ld_')
+    self.cleanup_dir_list.append(tmp_ld_dir)
+
+    return ld_est.compute(output_dir,
+                          temp_dir=tmp_ld_dir.name,
+                          dtype=dtype,
+                          compressor_name=compressor_name,
+                          compression_level=compression_level)
+
+
+
+ +
+ + +
+ + + +

+ compute_sample_size_per_snp() + +

+ + +
+ +

Compute the sample size for each variant in the genotype matrix, accounting for +potential missing values.

+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ NotImplementedError + +
+

If the method is not implemented in the subclass.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_sample_size_per_snp(self):
+    """
+    Compute the sample size for each variant in the genotype matrix, accounting for
+    potential missing values.
+
+    :raises NotImplementedError: If the method is not implemented in the subclass.
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ drop_duplicated_snps() + +

+ + +
+ +

A convenience method to drop variants with duplicated SNP rsIDs.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def drop_duplicated_snps(self):
+    """
+    A convenience method to drop variants with duplicated SNP rsIDs.
+    """
+
+    u_snps, counts = np.unique(self.snps, return_counts=True)
+    if len(u_snps) < self.n_snps:
+        # Keep only SNPs which occur once in the sequence:
+        self.filter_snps(u_snps[counts == 1])
+
+
+
+ +
+ + +
+ + + +

+ estimate_memory_allocation(dtype=np.float32) + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An estimate of the memory allocation for the genotype matrix in megabytes.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def estimate_memory_allocation(self, dtype=np.float32):
+    """
+    :return: An estimate of the memory allocation for the genotype matrix in megabytes.
+    """
+    return self.n * self.m * np.dtype(dtype).itemsize / 1024 ** 2
+
+
+
+ +
+ + +
+ + + +

+ filter_by_allele_frequency(min_maf=None, min_mac=1) + +

+ + +
+ +

Filter variants by minimum minor allele frequency or allele count cutoffs.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
min_maf + +
+

Minimum minor allele frequency

+
+
+ None +
min_mac + +
+

Minimum minor allele count (1 by default)

+
+
+ 1 +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def filter_by_allele_frequency(self, min_maf=None, min_mac=1):
+    """
+    Filter variants by minimum minor allele frequency or allele count cutoffs.
+
+    :param min_maf: Minimum minor allele frequency
+    :param min_mac: Minimum minor allele count (1 by default)
+    """
+
+    if min_mac or min_maf:
+
+        maf = self.maf
+        n = self.n_per_snp
+
+        keep_flag = None
+
+        if min_mac:
+            mac = (2*maf*n).astype(np.int64)
+            keep_flag = (mac >= min_mac) & ((2*n - mac) >= min_mac)
+
+        if min_maf:
+
+            maf_cond = (maf >= min_maf) & (1. - maf >= min_maf)
+            if keep_flag is not None:
+                keep_flag = keep_flag & maf_cond
+            else:
+                keep_flag = maf_cond
+
+        if keep_flag is not None:
+            self.filter_snps(extract_snps=self.snps[keep_flag])
+
+
+
+ +
+ + +
+ + + +

+ filter_samples(keep_samples=None, keep_file=None) + +

+ + +
+ +

Filter samples from the genotype matrix. User must specify +either a list of samples to keep or the path to a plink-style file +with the list of samples to keep.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
keep_samples + +
+

A list (or array) of sample IDs to keep in the genotype matrix.

+
+
+ None +
keep_file + +
+

The path to a file with the list of samples to keep.

+
+
+ None +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def filter_samples(self, keep_samples=None, keep_file=None):
+    """
+    Filter samples from the genotype matrix. User must specify
+    either a list of samples to keep or the path to a plink-style file
+    with the list of samples to keep.
+
+    :param keep_samples: A list (or array) of sample IDs to keep in the genotype matrix.
+    :param keep_file: The path to a file with the list of samples to keep.
+    """
+
+    self.sample_table.filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None) + +

+ + +
+ +

Filter variants from the genotype matrix. User must specify +either a list of variants to extract or the path to a plink-style file +with the list of variants to extract.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list (or array) of SNP IDs to keep in the genotype matrix.

+
+
+ None +
extract_file + +
+

The path to a file with the list of variants to extract.

+
+
+ None +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def filter_snps(self, extract_snps=None, extract_file=None):
+    """
+    Filter variants from the genotype matrix. User must specify
+    either a list of variants to extract or the path to a plink-style file
+    with the list of variants to extract.
+
+    :param extract_snps: A list (or array) of SNP IDs to keep in the genotype matrix.
+    :param extract_file: The path to a file with the list of variants to extract.
+    """
+
+    assert extract_snps is not None or extract_file is not None
+
+    if extract_snps is None:
+        from .parsers.misc_parsers import read_snp_filter_file
+        extract_snps = read_snp_filter_file(extract_file)
+
+    self.snp_table = self.snp_table.merge(pd.DataFrame({'SNP': extract_snps}))
+
+
+
+ +
+ + +
+ + + +

+ from_file(file_path, temp_dir='temp', **kwargs) + + + classmethod + + +

+ + +
+ +

Initialize a genotype matrix object by passing a file path + other keyword arguments.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_path + str + +
+

The path to the plink BED file.

+
+
+ required +
temp_dir + str + +
+

The directory where temporary files will be stored.

+
+
+ 'temp' +
kwargs + +
+

Additional keyword arguments.

+
+
+ {} +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
@classmethod
+def from_file(cls, file_path, temp_dir='temp', **kwargs):
+    """
+    Initialize a genotype matrix object by passing a file path + other keyword arguments.
+    :param file_path: The path to the plink BED file.
+    :type file_path: str
+    :param temp_dir: The directory where temporary files will be stored.
+    :type temp_dir: str
+    :param kwargs: Additional keyword arguments.
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ get_snp_attribute(attr) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
attr + +
+

The name of the attribute to extract from the SNP table.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The values of a specific attribute for each variant in the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def get_snp_attribute(self, attr):
+    """
+
+    :param attr: The name of the attribute to extract from the SNP table.
+    :return: The values of a specific attribute for each variant in the genotype matrix.
+    """
+    if self.snp_table is not None and attr in self.snp_table.columns:
+        return self.snp_table[attr].values
+
+
+
+ +
+ + +
+ + + +

+ get_snp_table(col_subset=None) + +

+ + +
+ +

A convenience method to extract SNP-related information from the genotype matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

A list of columns to extract from the SNP table.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame with the requested columns.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def get_snp_table(self, col_subset=None):
+    """
+    A convenience method to extract SNP-related information from the genotype matrix.
+    :param col_subset: A list of columns to extract from the SNP table.
+
+    :return: A `pandas` DataFrame with the requested columns.
+    """
+
+    if col_subset is None:
+        return self.snp_table.copy()
+    else:
+        present_cols = list(set(col_subset).intersection(set(self.snp_table.columns)))
+        non_present_cols = list(set(col_subset) - set(present_cols))
+
+        if len(present_cols) > 0:
+            table = self.snp_table[present_cols].copy()
+        else:
+            table = pd.DataFrame({c: [] for c in non_present_cols})
+
+        for col in non_present_cols:
+
+            if col == 'MAF':
+                table['MAF'] = self.maf
+            elif col == 'MAF_VAR':
+                table['MAF_VAR'] = self.maf_var
+            elif col == 'N':
+                table['N'] = self.n_per_snp
+            else:
+                raise KeyError(f"Column '{col}' is not available in the SNP table!")
+
+        return table[list(col_subset)]
+
+
+
+ +
+ + +
+ + + +

+ perform_gwas(**gwa_kwargs) + +

+ + +
+ +

Perform genome-wide association testing of all variants against the phenotype.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
gwa_kwargs + +
+

Keyword arguments to pass to the GWA functions. Consult stats.gwa.utils for relevant keyword arguments for each backend.

+
+
+ {} +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ NotImplementedError + +
+

If the method is not implemented in the subclass.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def perform_gwas(self, **gwa_kwargs):
+    """
+    Perform genome-wide association testing of all variants against the phenotype.
+
+    :param gwa_kwargs: Keyword arguments to pass to the GWA functions. Consult `stats.gwa.utils`
+    for relevant keyword arguments for each backend.
+
+    :raises NotImplementedError: If the method is not implemented in the subclass.
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ score(beta, standardize_genotype=False) + +

+ + +
+ +

Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, beta.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
beta + +
+

A vector of effect sizes for each variant in the genotype matrix.

+
+
+ required +
standardize_genotype + +
+

If True, standardized the genotype matrix when computing the score.

+
+
+ False +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def score(self, beta, standardize_genotype=False):
+    """
+    Perform linear scoring, i.e. multiply the genotype matrix by the vector of effect sizes, `beta`.
+
+    :param beta: A vector of effect sizes for each variant in the genotype matrix.
+    :param standardize_genotype: If True, standardized the genotype matrix when computing the score.
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ set_sample_table(sample_table) + +

+ + +
+ +

A convenience method set the sample table for the genotype matrix. +This may be useful for syncing sample tables across different Genotype matrices +corresponding to different chromosomes or genomic regions.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + +
+

An instance of SampleTable or a pandas dataframe containing information about the samples in the genotype matrix.

+
+
+ required +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def set_sample_table(self, sample_table):
+    """
+    A convenience method set the sample table for the genotype matrix.
+    This may be useful for syncing sample tables across different Genotype matrices
+    corresponding to different chromosomes or genomic regions.
+
+    :param sample_table: An instance of SampleTable or a pandas dataframe containing
+    information about the samples in the genotype matrix.
+
+    """
+
+    if isinstance(sample_table, SampleTable):
+        self.sample_table = sample_table
+    elif isinstance(sample_table, pd.DataFrame):
+        self.sample_table = SampleTable(sample_table)
+    else:
+        raise ValueError("The sample table is invalid! "
+                         "Has to be either an instance of "
+                         "SampleTable or pandas DataFrame.")
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome() + +

+ + +
+ +

Split the genotype matrix by chromosome, so that we would +have a separate GenotypeMatrix objects for each chromosome. +This method returns a dictionary where the key is the chromosome number +and the value is an object of GenotypeMatrix for that chromosome.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary of GenotypeMatrix objects, one for each chromosome.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def split_by_chromosome(self):
+    """
+    Split the genotype matrix by chromosome, so that we would
+    have a separate `GenotypeMatrix` objects for each chromosome.
+    This method returns a dictionary where the key is the chromosome number
+    and the value is an object of `GenotypeMatrix` for that chromosome.
+
+    :return: A dictionary of `GenotypeMatrix` objects, one for each chromosome.
+    """
+
+    chromosome = self.chromosome
+
+    if chromosome:
+        return {chromosome: self}
+    else:
+        chrom_tables = self.snp_table.groupby('CHR')
+        return {
+            c: self.__class__(sample_table=self.sample_table,
+                              snp_table=chrom_tables.get_group(c),
+                              temp_dir=self.temp_dir)
+            for c in chrom_tables.groups
+        }
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ plinkBEDGenotypeMatrix + + +

+ + +
+

+ Bases: GenotypeMatrix

+ + +

A class that defines methods and interfaces for interacting with genotype matrices +using plink2 software. This class provides a convenient interface to perform various +computations on genotype matrices stored in the plink BED format.

+

This class inherits all the attributes of the GenotypeMatrix class.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
class plinkBEDGenotypeMatrix(GenotypeMatrix):
+    """
+    A class that defines methods and interfaces for interacting with genotype matrices
+    using `plink2` software. This class provides a convenient interface to perform various
+    computations on genotype matrices stored in the plink BED format.
+
+    This class inherits all the attributes of the `GenotypeMatrix` class.
+    """
+
+    def __init__(self,
+                 sample_table=None,
+                 snp_table=None,
+                 temp_dir='temp',
+                 bed_file=None,
+                 genome_build=None,
+                 threads=1):
+        """
+        Initialize a `plinkBEDGenotypeMatrix` object.
+
+        :param sample_table: A table containing information about the samples in the genotype matrix.
+        :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+        :param temp_dir: The directory where temporary files will be stored (if needed).
+        :param bed_file: The path to the plink BED file containing the genotype matrix.
+        :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+        :param threads: The number of threads to use for parallel computations.
+        """
+
+        super().__init__(sample_table=sample_table,
+                         snp_table=snp_table,
+                         temp_dir=temp_dir,
+                         bed_file=bed_file,
+                         genome_build=genome_build,
+                         threads=threads)
+
+        if self.bed_file is not None:
+            self.bed_file = self.bed_file.replace('.bed', '')
+
+        if self.sample_table is None and self.bed_file:
+            self.sample_table = SampleTable(parse_fam_file(self.bed_file))
+
+        if self.snp_table is None and self.bed_file:
+            self.snp_table = parse_bim_file(self.bed_file)
+
+    @classmethod
+    def from_file(cls, file_path, temp_dir='temp', **kwargs):
+        """
+        A convenience method to create a `plinkBEDGenotypeMatrix` object by
+         providing a path to a PLINK BED file.
+
+        :param file_path: The path to the plink BED file.
+        :param temp_dir: The directory where temporary files will be stored.
+        :param kwargs: Additional keyword arguments.
+        """
+
+        p_gt = cls(bed_file=file_path, temp_dir=temp_dir, **kwargs)
+
+        return p_gt
+
+    def score(self, beta, standardize_genotype=False):
+        """
+        Perform linear scoring on the genotype matrix. This function takes a vector (or matrix) of
+        effect sizes and returns the matrix-vector or matrix-matrix product of the genotype matrix
+        multiplied by the effect sizes.
+
+        This can be used for polygenic score calculation or projecting the genotype matrix.
+
+        :param beta: A vector or matrix of effect sizes for each variant in the genotype matrix.
+        :param standardize_genotype: If True, standardize the genotype when computing the polygenic score.
+
+        :return: The polygenic score (PGS) for each sample in the genotype matrix.
+        """
+
+        from .stats.score.utils import score_plink2
+
+        # Create a temporary directory where we store intermediate results:
+        tmp_score_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='score_')
+        self.cleanup_dir_list.append(tmp_score_dir)
+
+        return score_plink2(self, beta, standardize_genotype=standardize_genotype, temp_dir=tmp_score_dir.name)
+
+    def perform_gwas(self, **gwa_kwargs):
+        """
+        Perform genome-wide association testing of all variants against the phenotype.
+        This method calls specialized functions that, in turn, call `plink2` to perform
+        the association testing.
+
+        :return: A Summary statistics table containing the results of the association testing.
+        """
+
+        from .stats.gwa.utils import perform_gwa_plink2
+
+        # Create a temporary directory where we store intermediate results:
+        tmp_gwas_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='gwas_')
+        self.cleanup_dir_list.append(tmp_gwas_dir)
+
+        return perform_gwa_plink2(self, temp_dir=tmp_gwas_dir.name, **gwa_kwargs)
+
+    def compute_allele_frequency(self):
+        """
+        Compute the allele frequency of each variant or SNP in the genotype matrix.
+        This method calls specialized functions that, in turn, call `plink2` to compute
+        allele frequency.
+        """
+
+        from .stats.variant.utils import compute_allele_frequency_plink2
+
+        # Create a temporary directory where we store intermediate results:
+        tmp_freq_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='freq_')
+        self.cleanup_dir_list.append(tmp_freq_dir)
+
+        self.snp_table['MAF'] = compute_allele_frequency_plink2(self, temp_dir=tmp_freq_dir.name)
+
+    def compute_sample_size_per_snp(self):
+        """
+        Compute the sample size for each variant in the genotype matrix, accounting for
+        potential missing values.
+
+        This method calls specialized functions that, in turn, call `plink2` to compute sample
+        size per variant.
+        """
+
+        from .stats.variant.utils import compute_sample_size_per_snp_plink2
+
+        # Create a temporary directory where we store intermediate results:
+        tmp_miss_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='miss_')
+        self.cleanup_dir_list.append(tmp_miss_dir)
+
+        self.snp_table['N'] = compute_sample_size_per_snp_plink2(self, temp_dir=tmp_miss_dir.name)
+
+    def split_by_chromosome(self):
+        """
+        Split the genotype matrix by chromosome.
+        :return: A dictionary of `plinkBEDGenotypeMatrix` objects, one for each chromosome.
+        """
+
+        split = super().split_by_chromosome()
+
+        for c, gt in split.items():
+            gt.bed_file = self.bed_file
+
+        return split
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(sample_table=None, snp_table=None, temp_dir='temp', bed_file=None, genome_build=None, threads=1) + +

+ + +
+ +

Initialize a plinkBEDGenotypeMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + +
+

A table containing information about the samples in the genotype matrix.

+
+
+ None +
snp_table + +
+

A table containing information about the genetic variants in the genotype matrix.

+
+
+ None +
temp_dir + +
+

The directory where temporary files will be stored (if needed).

+
+
+ 'temp' +
bed_file + +
+

The path to the plink BED file containing the genotype matrix.

+
+
+ None +
genome_build + +
+

The genome build or assembly under which the SNP coordinates are defined.

+
+
+ None +
threads + +
+

The number of threads to use for parallel computations.

+
+
+ 1 +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def __init__(self,
+             sample_table=None,
+             snp_table=None,
+             temp_dir='temp',
+             bed_file=None,
+             genome_build=None,
+             threads=1):
+    """
+    Initialize a `plinkBEDGenotypeMatrix` object.
+
+    :param sample_table: A table containing information about the samples in the genotype matrix.
+    :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+    :param temp_dir: The directory where temporary files will be stored (if needed).
+    :param bed_file: The path to the plink BED file containing the genotype matrix.
+    :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+    :param threads: The number of threads to use for parallel computations.
+    """
+
+    super().__init__(sample_table=sample_table,
+                     snp_table=snp_table,
+                     temp_dir=temp_dir,
+                     bed_file=bed_file,
+                     genome_build=genome_build,
+                     threads=threads)
+
+    if self.bed_file is not None:
+        self.bed_file = self.bed_file.replace('.bed', '')
+
+    if self.sample_table is None and self.bed_file:
+        self.sample_table = SampleTable(parse_fam_file(self.bed_file))
+
+    if self.snp_table is None and self.bed_file:
+        self.snp_table = parse_bim_file(self.bed_file)
+
+
+
+ +
+ + +
+ + + +

+ compute_allele_frequency() + +

+ + +
+ +

Compute the allele frequency of each variant or SNP in the genotype matrix. +This method calls specialized functions that, in turn, call plink2 to compute +allele frequency.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_allele_frequency(self):
+    """
+    Compute the allele frequency of each variant or SNP in the genotype matrix.
+    This method calls specialized functions that, in turn, call `plink2` to compute
+    allele frequency.
+    """
+
+    from .stats.variant.utils import compute_allele_frequency_plink2
+
+    # Create a temporary directory where we store intermediate results:
+    tmp_freq_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='freq_')
+    self.cleanup_dir_list.append(tmp_freq_dir)
+
+    self.snp_table['MAF'] = compute_allele_frequency_plink2(self, temp_dir=tmp_freq_dir.name)
+
+
+
+ +
+ + +
+ + + +

+ compute_sample_size_per_snp() + +

+ + +
+ +

Compute the sample size for each variant in the genotype matrix, accounting for +potential missing values.

+

This method calls specialized functions that, in turn, call plink2 to compute sample +size per variant.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_sample_size_per_snp(self):
+    """
+    Compute the sample size for each variant in the genotype matrix, accounting for
+    potential missing values.
+
+    This method calls specialized functions that, in turn, call `plink2` to compute sample
+    size per variant.
+    """
+
+    from .stats.variant.utils import compute_sample_size_per_snp_plink2
+
+    # Create a temporary directory where we store intermediate results:
+    tmp_miss_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='miss_')
+    self.cleanup_dir_list.append(tmp_miss_dir)
+
+    self.snp_table['N'] = compute_sample_size_per_snp_plink2(self, temp_dir=tmp_miss_dir.name)
+
+
+
+ +
+ + +
+ + + +

+ from_file(file_path, temp_dir='temp', **kwargs) + + + classmethod + + +

+ + +
+ +

A convenience method to create a plinkBEDGenotypeMatrix object by + providing a path to a PLINK BED file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_path + +
+

The path to the plink BED file.

+
+
+ required +
temp_dir + +
+

The directory where temporary files will be stored.

+
+
+ 'temp' +
kwargs + +
+

Additional keyword arguments.

+
+
+ {} +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
@classmethod
+def from_file(cls, file_path, temp_dir='temp', **kwargs):
+    """
+    A convenience method to create a `plinkBEDGenotypeMatrix` object by
+     providing a path to a PLINK BED file.
+
+    :param file_path: The path to the plink BED file.
+    :param temp_dir: The directory where temporary files will be stored.
+    :param kwargs: Additional keyword arguments.
+    """
+
+    p_gt = cls(bed_file=file_path, temp_dir=temp_dir, **kwargs)
+
+    return p_gt
+
+
+
+ +
+ + +
+ + + +

+ perform_gwas(**gwa_kwargs) + +

+ + +
+ +

Perform genome-wide association testing of all variants against the phenotype. +This method calls specialized functions that, in turn, call plink2 to perform +the association testing.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A Summary statistics table containing the results of the association testing.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def perform_gwas(self, **gwa_kwargs):
+    """
+    Perform genome-wide association testing of all variants against the phenotype.
+    This method calls specialized functions that, in turn, call `plink2` to perform
+    the association testing.
+
+    :return: A Summary statistics table containing the results of the association testing.
+    """
+
+    from .stats.gwa.utils import perform_gwa_plink2
+
+    # Create a temporary directory where we store intermediate results:
+    tmp_gwas_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='gwas_')
+    self.cleanup_dir_list.append(tmp_gwas_dir)
+
+    return perform_gwa_plink2(self, temp_dir=tmp_gwas_dir.name, **gwa_kwargs)
+
+
+
+ +
+ + +
+ + + +

+ score(beta, standardize_genotype=False) + +

+ + +
+ +

Perform linear scoring on the genotype matrix. This function takes a vector (or matrix) of +effect sizes and returns the matrix-vector or matrix-matrix product of the genotype matrix +multiplied by the effect sizes.

+

This can be used for polygenic score calculation or projecting the genotype matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
beta + +
+

A vector or matrix of effect sizes for each variant in the genotype matrix.

+
+
+ required +
standardize_genotype + +
+

If True, standardize the genotype when computing the polygenic score.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The polygenic score (PGS) for each sample in the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def score(self, beta, standardize_genotype=False):
+    """
+    Perform linear scoring on the genotype matrix. This function takes a vector (or matrix) of
+    effect sizes and returns the matrix-vector or matrix-matrix product of the genotype matrix
+    multiplied by the effect sizes.
+
+    This can be used for polygenic score calculation or projecting the genotype matrix.
+
+    :param beta: A vector or matrix of effect sizes for each variant in the genotype matrix.
+    :param standardize_genotype: If True, standardize the genotype when computing the polygenic score.
+
+    :return: The polygenic score (PGS) for each sample in the genotype matrix.
+    """
+
+    from .stats.score.utils import score_plink2
+
+    # Create a temporary directory where we store intermediate results:
+    tmp_score_dir = tempfile.TemporaryDirectory(dir=self.temp_dir, prefix='score_')
+    self.cleanup_dir_list.append(tmp_score_dir)
+
+    return score_plink2(self, beta, standardize_genotype=standardize_genotype, temp_dir=tmp_score_dir.name)
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome() + +

+ + +
+ +

Split the genotype matrix by chromosome.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary of plinkBEDGenotypeMatrix objects, one for each chromosome.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def split_by_chromosome(self):
+    """
+    Split the genotype matrix by chromosome.
+    :return: A dictionary of `plinkBEDGenotypeMatrix` objects, one for each chromosome.
+    """
+
+    split = super().split_by_chromosome()
+
+    for c, gt in split.items():
+        gt.bed_file = self.bed_file
+
+    return split
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ xarrayGenotypeMatrix + + +

+ + +
+

+ Bases: GenotypeMatrix

+ + +

A class that defines methods and interfaces for interacting with genotype matrices +using the xarray library. In particular, the class leverages functionality provided by +the pandas-plink package to represent on-disk genotype matrices as chunked multidimensional +arrays that can be queried and manipulated efficiently and in parallel.

+

This class inherits all the attributes of the GenotypeMatrix class.

+ + + +

Attributes:

+ + + + + + + + + + + + + + + +
NameTypeDescription
xr_mat + +
+

The xarray object representing the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
class xarrayGenotypeMatrix(GenotypeMatrix):
+    """
+    A class that defines methods and interfaces for interacting with genotype matrices
+    using the `xarray` library. In particular, the class leverages functionality provided by
+    the `pandas-plink` package to represent on-disk genotype matrices as chunked multidimensional
+    arrays that can be queried and manipulated efficiently and in parallel.
+
+    This class inherits all the attributes of the `GenotypeMatrix` class.
+
+    :ivar xr_mat: The `xarray` object representing the genotype matrix.
+
+    """
+
+    def __init__(self,
+                 sample_table=None,
+                 snp_table=None,
+                 bed_file=None,
+                 temp_dir='temp',
+                 xr_mat=None,
+                 genome_build=None,
+                 threads=1):
+        """
+        Initialize an xarrayGenotypeMatrix object.
+
+        :param sample_table: A table containing information about the samples in the genotype matrix.
+        :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+        :param bed_file: The path to the plink BED file containing the genotype matrix.
+        :param temp_dir: The directory where temporary files will be stored (if needed).
+        :param xr_mat: The xarray object representing the genotype matrix.
+        :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+        :param threads: The number of threads to use for parallel computations.
+        """
+
+        super().__init__(sample_table=sample_table,
+                         snp_table=snp_table,
+                         temp_dir=temp_dir,
+                         bed_file=bed_file,
+                         genome_build=genome_build,
+                         threads=threads)
+
+        # xarray matrix object, as defined by pandas-plink:
+        self.xr_mat = xr_mat
+
+    @classmethod
+    def from_file(cls, file_path, temp_dir='temp', **kwargs):
+        """
+        Create a GenotypeMatrix object using a PLINK BED file with the help
+        of the data structures defined in `pandas_plink`. The genotype matrix
+        will be represented implicitly in an `xarray` object, and we will use it
+        to perform various computations. This method is a utility function to
+        construct the genotype matrix object from a plink BED file.
+
+        :param file_path: Path to the plink BED file.
+        :param temp_dir: The directory where the temporary files will be stored.
+        :param kwargs: Additional keyword arguments.
+        """
+
+        from pandas_plink import read_plink1_bin
+
+        try:
+            xr_gt = read_plink1_bin(file_path + ".bed", ref="a0", verbose=False)
+        except ValueError:
+            xr_gt = read_plink1_bin(file_path, ref="a0", verbose=False)
+
+        # Set the sample table:
+        sample_table = xr_gt.sample.coords.to_dataset().to_dataframe()
+        sample_table.columns = ['FID', 'IID', 'fatherID', 'motherID', 'sex', 'phenotype']
+        sample_table.reset_index(inplace=True, drop=True)
+        sample_table = sample_table.astype({
+            'FID': str,
+            'IID': str,
+            'fatherID': str,
+            'motherID': str,
+            'sex': float,
+            'phenotype': float
+        })
+
+        sample_table['phenotype'] = sample_table['phenotype'].replace({-9.: np.nan})
+
+        # Set the snp table:
+        snp_table = xr_gt.variant.coords.to_dataset().to_dataframe()
+        snp_table.columns = ['CHR', 'SNP', 'cM', 'POS', 'A1', 'A2']
+        snp_table.reset_index(inplace=True, drop=True)
+        snp_table = snp_table.astype({
+            'CHR': int,
+            'SNP': str,
+            'cM': np.float32,
+            'POS': np.int32,
+            'A1': str,
+            'A2': str
+        })
+
+        g_mat = cls(sample_table=SampleTable(sample_table),
+                    snp_table=snp_table,
+                    temp_dir=temp_dir,
+                    bed_file=file_path,
+                    xr_mat=xr_gt,
+                    **kwargs)
+
+        return g_mat
+
+    def set_sample_table(self, sample_table):
+        """
+        A convenience method set the sample table for the genotype matrix.
+        This is useful for cases when we need to sync the sample table across chromosomes.
+
+        :param sample_table: An instance of SampleTable or a pandas dataframe containing
+        information about the samples in the genotype matrix.
+        """
+
+        super().set_sample_table(sample_table)
+
+        try:
+            if self.n != self.xr_mat.shape[0]:
+                self.xr_mat = self.xr_mat.sel(sample=self.samples)
+        except AttributeError:
+            pass
+
+    def filter_snps(self, extract_snps=None, extract_file=None):
+        """
+        Filter variants from the genotype matrix. User must specify either a list of variants to
+        extract or the path to a file with the list of variants to extract.
+
+        :param extract_snps: A list or array of SNP rsIDs to keep in the genotype matrix.
+        :param extract_file: The path to a file with the list of variants to extract.
+        """
+
+        super().filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+        self.xr_mat = self.xr_mat.sel(variant=np.isin(self.xr_mat.variant.coords['snp'], self.snps))
+
+    def filter_samples(self, keep_samples=None, keep_file=None):
+        """
+        Filter samples from the genotype matrix.
+        User must specify either a list of samples to keep or the path to a file with the list of samples to keep.
+
+        :param keep_samples: A list (or array) of sample IDs to keep in the genotype matrix.
+        :param keep_file: The path to a file with the list of samples to keep.
+        """
+
+        super().filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+        self.xr_mat = self.xr_mat.sel(sample=self.samples)
+
+    def to_numpy(self, dtype=np.int8):
+        """
+        Convert the genotype matrix to a numpy array.
+        :param dtype: The data type of the numpy array. Default: Int8
+
+        :return: A numpy array representation of the genotype matrix.
+        """
+
+        return self.xr_mat.data.astype(dtype).compute()
+
+    def to_csr(self, dtype=np.int8):
+        """
+        Convert the genotype matrix to a scipy sparse CSR matrix.
+        :param dtype: The data type of the scipy array. Default: Int8
+
+        :return: A `scipy` sparse CSR matrix representation of the genotype matrix.
+        """
+
+        mat = self.to_numpy(dtype=dtype)
+
+        from scipy.sparse import csr_matrix
+
+        return csr_matrix(mat)
+
+    def score(self, beta, standardize_genotype=False, skip_na=True):
+        """
+        Perform linear scoring on the genotype matrix.
+        :param beta: A vector or matrix of effect sizes for each variant in the genotype matrix.
+        :param standardize_genotype: If True, standardize the genotype when computing the polygenic score.
+        :param skip_na: If True, skip missing values when computing the polygenic score.
+
+        :return: The polygenic score (PGS) for each sample in the genotype matrix.
+
+        """
+
+        import dask.array as da
+
+        chunked_beta = da.from_array(beta, chunks=self.xr_mat.data.chunksize[1])
+
+        if standardize_genotype:
+            from .stats.transforms.genotype import standardize
+            pgs = da.dot(standardize(self.xr_mat).data, chunked_beta).compute()
+        else:
+            if skip_na:
+                pgs = da.dot(da.nan_to_num(self.xr_mat.data), chunked_beta).compute()
+            else:
+                pgs = da.dot(self.xr_mat.fillna(self.maf).data, chunked_beta).compute()
+
+        return pgs
+
+    def perform_gwas(self, **gwa_kwargs):
+        """
+        A convenience method that calls specialized utility functions that perform
+        genome-wide association testing of all variants against the phenotype.
+
+        :return: A Summary statistics table containing the results of the association testing.
+        """
+
+        from .stats.gwa.utils import perform_gwa_xarray
+        return perform_gwa_xarray(self, **gwa_kwargs)
+
+    def compute_allele_frequency(self):
+        """
+        A convenience method that calls specialized utility functions that
+        compute the allele frequency of each variant or SNP in the genotype matrix.
+        """
+        self.snp_table['MAF'] = (self.xr_mat.sum(axis=0) / (2. * self.n_per_snp)).compute().values
+
+    def compute_sample_size_per_snp(self):
+        """
+        A convenience method that calls specialized utility functions that compute
+        the sample size for each variant in the genotype matrix, accounting for
+        potential missing values.
+        """
+        self.snp_table['N'] = self.xr_mat.shape[0] - self.xr_mat.isnull().sum(axis=0).compute().values
+
+    def split_by_chromosome(self):
+        """
+        Split the genotype matrix by chromosome.
+        :return: A dictionary of `xarrayGenotypeMatrix` objects, one for each chromosome.
+        """
+        split = super().split_by_chromosome()
+
+        for c, gt in split.items():
+            gt.xr_mat = self.xr_mat
+            if len(split) > 1:
+                gt.filter_snps(extract_snps=gt.snps)
+
+        return split
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(sample_table=None, snp_table=None, bed_file=None, temp_dir='temp', xr_mat=None, genome_build=None, threads=1) + +

+ + +
+ +

Initialize an xarrayGenotypeMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + +
+

A table containing information about the samples in the genotype matrix.

+
+
+ None +
snp_table + +
+

A table containing information about the genetic variants in the genotype matrix.

+
+
+ None +
bed_file + +
+

The path to the plink BED file containing the genotype matrix.

+
+
+ None +
temp_dir + +
+

The directory where temporary files will be stored (if needed).

+
+
+ 'temp' +
xr_mat + +
+

The xarray object representing the genotype matrix.

+
+
+ None +
genome_build + +
+

The genome build or assembly under which the SNP coordinates are defined.

+
+
+ None +
threads + +
+

The number of threads to use for parallel computations.

+
+
+ 1 +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def __init__(self,
+             sample_table=None,
+             snp_table=None,
+             bed_file=None,
+             temp_dir='temp',
+             xr_mat=None,
+             genome_build=None,
+             threads=1):
+    """
+    Initialize an xarrayGenotypeMatrix object.
+
+    :param sample_table: A table containing information about the samples in the genotype matrix.
+    :param snp_table: A table containing information about the genetic variants in the genotype matrix.
+    :param bed_file: The path to the plink BED file containing the genotype matrix.
+    :param temp_dir: The directory where temporary files will be stored (if needed).
+    :param xr_mat: The xarray object representing the genotype matrix.
+    :param genome_build: The genome build or assembly under which the SNP coordinates are defined.
+    :param threads: The number of threads to use for parallel computations.
+    """
+
+    super().__init__(sample_table=sample_table,
+                     snp_table=snp_table,
+                     temp_dir=temp_dir,
+                     bed_file=bed_file,
+                     genome_build=genome_build,
+                     threads=threads)
+
+    # xarray matrix object, as defined by pandas-plink:
+    self.xr_mat = xr_mat
+
+
+
+ +
+ + +
+ + + +

+ compute_allele_frequency() + +

+ + +
+ +

A convenience method that calls specialized utility functions that +compute the allele frequency of each variant or SNP in the genotype matrix.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_allele_frequency(self):
+    """
+    A convenience method that calls specialized utility functions that
+    compute the allele frequency of each variant or SNP in the genotype matrix.
+    """
+    self.snp_table['MAF'] = (self.xr_mat.sum(axis=0) / (2. * self.n_per_snp)).compute().values
+
+
+
+ +
+ + +
+ + + +

+ compute_sample_size_per_snp() + +

+ + +
+ +

A convenience method that calls specialized utility functions that compute +the sample size for each variant in the genotype matrix, accounting for +potential missing values.

+ +
+ Source code in magenpy/GenotypeMatrix.py +
def compute_sample_size_per_snp(self):
+    """
+    A convenience method that calls specialized utility functions that compute
+    the sample size for each variant in the genotype matrix, accounting for
+    potential missing values.
+    """
+    self.snp_table['N'] = self.xr_mat.shape[0] - self.xr_mat.isnull().sum(axis=0).compute().values
+
+
+
+ +
+ + +
+ + + +

+ filter_samples(keep_samples=None, keep_file=None) + +

+ + +
+ +

Filter samples from the genotype matrix. +User must specify either a list of samples to keep or the path to a file with the list of samples to keep.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
keep_samples + +
+

A list (or array) of sample IDs to keep in the genotype matrix.

+
+
+ None +
keep_file + +
+

The path to a file with the list of samples to keep.

+
+
+ None +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def filter_samples(self, keep_samples=None, keep_file=None):
+    """
+    Filter samples from the genotype matrix.
+    User must specify either a list of samples to keep or the path to a file with the list of samples to keep.
+
+    :param keep_samples: A list (or array) of sample IDs to keep in the genotype matrix.
+    :param keep_file: The path to a file with the list of samples to keep.
+    """
+
+    super().filter_samples(keep_samples=keep_samples, keep_file=keep_file)
+    self.xr_mat = self.xr_mat.sel(sample=self.samples)
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None) + +

+ + +
+ +

Filter variants from the genotype matrix. User must specify either a list of variants to +extract or the path to a file with the list of variants to extract.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list or array of SNP rsIDs to keep in the genotype matrix.

+
+
+ None +
extract_file + +
+

The path to a file with the list of variants to extract.

+
+
+ None +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def filter_snps(self, extract_snps=None, extract_file=None):
+    """
+    Filter variants from the genotype matrix. User must specify either a list of variants to
+    extract or the path to a file with the list of variants to extract.
+
+    :param extract_snps: A list or array of SNP rsIDs to keep in the genotype matrix.
+    :param extract_file: The path to a file with the list of variants to extract.
+    """
+
+    super().filter_snps(extract_snps=extract_snps, extract_file=extract_file)
+    self.xr_mat = self.xr_mat.sel(variant=np.isin(self.xr_mat.variant.coords['snp'], self.snps))
+
+
+
+ +
+ + +
+ + + +

+ from_file(file_path, temp_dir='temp', **kwargs) + + + classmethod + + +

+ + +
+ +

Create a GenotypeMatrix object using a PLINK BED file with the help +of the data structures defined in pandas_plink. The genotype matrix +will be represented implicitly in an xarray object, and we will use it +to perform various computations. This method is a utility function to +construct the genotype matrix object from a plink BED file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_path + +
+

Path to the plink BED file.

+
+
+ required +
temp_dir + +
+

The directory where the temporary files will be stored.

+
+
+ 'temp' +
kwargs + +
+

Additional keyword arguments.

+
+
+ {} +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
@classmethod
+def from_file(cls, file_path, temp_dir='temp', **kwargs):
+    """
+    Create a GenotypeMatrix object using a PLINK BED file with the help
+    of the data structures defined in `pandas_plink`. The genotype matrix
+    will be represented implicitly in an `xarray` object, and we will use it
+    to perform various computations. This method is a utility function to
+    construct the genotype matrix object from a plink BED file.
+
+    :param file_path: Path to the plink BED file.
+    :param temp_dir: The directory where the temporary files will be stored.
+    :param kwargs: Additional keyword arguments.
+    """
+
+    from pandas_plink import read_plink1_bin
+
+    try:
+        xr_gt = read_plink1_bin(file_path + ".bed", ref="a0", verbose=False)
+    except ValueError:
+        xr_gt = read_plink1_bin(file_path, ref="a0", verbose=False)
+
+    # Set the sample table:
+    sample_table = xr_gt.sample.coords.to_dataset().to_dataframe()
+    sample_table.columns = ['FID', 'IID', 'fatherID', 'motherID', 'sex', 'phenotype']
+    sample_table.reset_index(inplace=True, drop=True)
+    sample_table = sample_table.astype({
+        'FID': str,
+        'IID': str,
+        'fatherID': str,
+        'motherID': str,
+        'sex': float,
+        'phenotype': float
+    })
+
+    sample_table['phenotype'] = sample_table['phenotype'].replace({-9.: np.nan})
+
+    # Set the snp table:
+    snp_table = xr_gt.variant.coords.to_dataset().to_dataframe()
+    snp_table.columns = ['CHR', 'SNP', 'cM', 'POS', 'A1', 'A2']
+    snp_table.reset_index(inplace=True, drop=True)
+    snp_table = snp_table.astype({
+        'CHR': int,
+        'SNP': str,
+        'cM': np.float32,
+        'POS': np.int32,
+        'A1': str,
+        'A2': str
+    })
+
+    g_mat = cls(sample_table=SampleTable(sample_table),
+                snp_table=snp_table,
+                temp_dir=temp_dir,
+                bed_file=file_path,
+                xr_mat=xr_gt,
+                **kwargs)
+
+    return g_mat
+
+
+
+ +
+ + +
+ + + +

+ perform_gwas(**gwa_kwargs) + +

+ + +
+ +

A convenience method that calls specialized utility functions that perform +genome-wide association testing of all variants against the phenotype.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A Summary statistics table containing the results of the association testing.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def perform_gwas(self, **gwa_kwargs):
+    """
+    A convenience method that calls specialized utility functions that perform
+    genome-wide association testing of all variants against the phenotype.
+
+    :return: A Summary statistics table containing the results of the association testing.
+    """
+
+    from .stats.gwa.utils import perform_gwa_xarray
+    return perform_gwa_xarray(self, **gwa_kwargs)
+
+
+
+ +
+ + +
+ + + +

+ score(beta, standardize_genotype=False, skip_na=True) + +

+ + +
+ +

Perform linear scoring on the genotype matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
beta + +
+

A vector or matrix of effect sizes for each variant in the genotype matrix.

+
+
+ required +
standardize_genotype + +
+

If True, standardize the genotype when computing the polygenic score.

+
+
+ False +
skip_na + +
+

If True, skip missing values when computing the polygenic score.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The polygenic score (PGS) for each sample in the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def score(self, beta, standardize_genotype=False, skip_na=True):
+    """
+    Perform linear scoring on the genotype matrix.
+    :param beta: A vector or matrix of effect sizes for each variant in the genotype matrix.
+    :param standardize_genotype: If True, standardize the genotype when computing the polygenic score.
+    :param skip_na: If True, skip missing values when computing the polygenic score.
+
+    :return: The polygenic score (PGS) for each sample in the genotype matrix.
+
+    """
+
+    import dask.array as da
+
+    chunked_beta = da.from_array(beta, chunks=self.xr_mat.data.chunksize[1])
+
+    if standardize_genotype:
+        from .stats.transforms.genotype import standardize
+        pgs = da.dot(standardize(self.xr_mat).data, chunked_beta).compute()
+    else:
+        if skip_na:
+            pgs = da.dot(da.nan_to_num(self.xr_mat.data), chunked_beta).compute()
+        else:
+            pgs = da.dot(self.xr_mat.fillna(self.maf).data, chunked_beta).compute()
+
+    return pgs
+
+
+
+ +
+ + +
+ + + +

+ set_sample_table(sample_table) + +

+ + +
+ +

A convenience method set the sample table for the genotype matrix. +This is useful for cases when we need to sync the sample table across chromosomes.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + +
+

An instance of SampleTable or a pandas dataframe containing information about the samples in the genotype matrix.

+
+
+ required +
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def set_sample_table(self, sample_table):
+    """
+    A convenience method set the sample table for the genotype matrix.
+    This is useful for cases when we need to sync the sample table across chromosomes.
+
+    :param sample_table: An instance of SampleTable or a pandas dataframe containing
+    information about the samples in the genotype matrix.
+    """
+
+    super().set_sample_table(sample_table)
+
+    try:
+        if self.n != self.xr_mat.shape[0]:
+            self.xr_mat = self.xr_mat.sel(sample=self.samples)
+    except AttributeError:
+        pass
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome() + +

+ + +
+ +

Split the genotype matrix by chromosome.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary of xarrayGenotypeMatrix objects, one for each chromosome.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def split_by_chromosome(self):
+    """
+    Split the genotype matrix by chromosome.
+    :return: A dictionary of `xarrayGenotypeMatrix` objects, one for each chromosome.
+    """
+    split = super().split_by_chromosome()
+
+    for c, gt in split.items():
+        gt.xr_mat = self.xr_mat
+        if len(split) > 1:
+            gt.filter_snps(extract_snps=gt.snps)
+
+    return split
+
+
+
+ +
+ + +
+ + + +

+ to_csr(dtype=np.int8) + +

+ + +
+ +

Convert the genotype matrix to a scipy sparse CSR matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + +
+

The data type of the scipy array. Default: Int8

+
+
+ int8 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A scipy sparse CSR matrix representation of the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def to_csr(self, dtype=np.int8):
+    """
+    Convert the genotype matrix to a scipy sparse CSR matrix.
+    :param dtype: The data type of the scipy array. Default: Int8
+
+    :return: A `scipy` sparse CSR matrix representation of the genotype matrix.
+    """
+
+    mat = self.to_numpy(dtype=dtype)
+
+    from scipy.sparse import csr_matrix
+
+    return csr_matrix(mat)
+
+
+
+ +
+ + +
+ + + +

+ to_numpy(dtype=np.int8) + +

+ + +
+ +

Convert the genotype matrix to a numpy array.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + +
+

The data type of the numpy array. Default: Int8

+
+
+ int8 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy array representation of the genotype matrix.

+
+
+ +
+ Source code in magenpy/GenotypeMatrix.py +
def to_numpy(self, dtype=np.int8):
+    """
+    Convert the genotype matrix to a numpy array.
+    :param dtype: The data type of the numpy array. Default: Int8
+
+    :return: A numpy array representation of the genotype matrix.
+    """
+
+    return self.xr_mat.data.astype(dtype).compute()
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/LDMatrix/index.html b/api/LDMatrix/index.html new file mode 100644 index 0000000..b8f59ca --- /dev/null +++ b/api/LDMatrix/index.html @@ -0,0 +1,9918 @@ + + + + + + + + + + + + + + + + + + + LDMatrix - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

LDMatrix

+ +
+ + + + +
+

+ Bases: object

+ + +

A class that represents Linkage-Disequilibrium (LD) matrices, which record +the SNP-by-SNP pairwise correlations in a sample of genetic data. The class +provides various functionalities for initializing, storing, loading, and +performing computations with LD matrices. The LD matrices are stored in a +hierarchical format using the Zarr library, which allows for efficient +storage and retrieval of the data.

+

The class provides the following functionalities:

+
    +
  • Initialize an LDMatrix object from plink's LD table files.
  • +
  • Initialize an LDMatrix object from a sparse CSR matrix.
  • +
  • Initialize an LDMatrix object from a Zarr array store.
  • +
  • Compute LD scores for each SNP in the LD matrix.
  • +
  • Filter the LD matrix based on SNP indices or ranges.
  • +
+

The Zarr hierarchy is structured as follows:

+
    +
  • chr_22.zarr: The Zarr group.
      +
    • matrix: The subgroup containing the data of the LD matrix in Scipy Sparse CSR matrix format.
        +
      • data: The array containing the non-zero entries of the LD matrix.
      • +
      • indptr: The array containing the index pointers for the CSR matrix.
      • +
      +
    • +
    • metadata: The subgroup containing the metadata for variants included in the LD matrix.
        +
      • snps: The array containing the SNP rsIDs.
      • +
      • a1: The array containing the alternative alleles.
      • +
      • a2: The array containing the reference alleles.
      • +
      • maf: The array containing the minor allele frequencies.
      • +
      • bp: The array containing the base pair positions.
      • +
      • cm: The array containing the centi Morgan positions.
      • +
      • ldscore: The array containing the LD scores.
      • +
      +
    • +
    • attrs: A JSON-style metadata object containing general information about how the LD matrix +was calculated, including the chromosome number, sample size, genome build, LD estimator, +and estimator properties.
    • +
    +
  • +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
_zg + +
+

The Zarr group object that stores the LD matrix and its metadata.

+
+
_mat + +
+

The in-memory CSR matrix object.

+
+
in_memory + +
+

A boolean flag indicating whether the LD matrix is in memory.

+
+
is_symmetric + +
+

A boolean flag indicating whether the LD matrix is symmetric.

+
+
index + +
+

An integer index for the current SNP in the LD matrix (useful for iterators).

+
+
_mask + +
+

A boolean mask for filtering the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
   9
+  10
+  11
+  12
+  13
+  14
+  15
+  16
+  17
+  18
+  19
+  20
+  21
+  22
+  23
+  24
+  25
+  26
+  27
+  28
+  29
+  30
+  31
+  32
+  33
+  34
+  35
+  36
+  37
+  38
+  39
+  40
+  41
+  42
+  43
+  44
+  45
+  46
+  47
+  48
+  49
+  50
+  51
+  52
+  53
+  54
+  55
+  56
+  57
+  58
+  59
+  60
+  61
+  62
+  63
+  64
+  65
+  66
+  67
+  68
+  69
+  70
+  71
+  72
+  73
+  74
+  75
+  76
+  77
+  78
+  79
+  80
+  81
+  82
+  83
+  84
+  85
+  86
+  87
+  88
+  89
+  90
+  91
+  92
+  93
+  94
+  95
+  96
+  97
+  98
+  99
+ 100
+ 101
+ 102
+ 103
+ 104
+ 105
+ 106
+ 107
+ 108
+ 109
+ 110
+ 111
+ 112
+ 113
+ 114
+ 115
+ 116
+ 117
+ 118
+ 119
+ 120
+ 121
+ 122
+ 123
+ 124
+ 125
+ 126
+ 127
+ 128
+ 129
+ 130
+ 131
+ 132
+ 133
+ 134
+ 135
+ 136
+ 137
+ 138
+ 139
+ 140
+ 141
+ 142
+ 143
+ 144
+ 145
+ 146
+ 147
+ 148
+ 149
+ 150
+ 151
+ 152
+ 153
+ 154
+ 155
+ 156
+ 157
+ 158
+ 159
+ 160
+ 161
+ 162
+ 163
+ 164
+ 165
+ 166
+ 167
+ 168
+ 169
+ 170
+ 171
+ 172
+ 173
+ 174
+ 175
+ 176
+ 177
+ 178
+ 179
+ 180
+ 181
+ 182
+ 183
+ 184
+ 185
+ 186
+ 187
+ 188
+ 189
+ 190
+ 191
+ 192
+ 193
+ 194
+ 195
+ 196
+ 197
+ 198
+ 199
+ 200
+ 201
+ 202
+ 203
+ 204
+ 205
+ 206
+ 207
+ 208
+ 209
+ 210
+ 211
+ 212
+ 213
+ 214
+ 215
+ 216
+ 217
+ 218
+ 219
+ 220
+ 221
+ 222
+ 223
+ 224
+ 225
+ 226
+ 227
+ 228
+ 229
+ 230
+ 231
+ 232
+ 233
+ 234
+ 235
+ 236
+ 237
+ 238
+ 239
+ 240
+ 241
+ 242
+ 243
+ 244
+ 245
+ 246
+ 247
+ 248
+ 249
+ 250
+ 251
+ 252
+ 253
+ 254
+ 255
+ 256
+ 257
+ 258
+ 259
+ 260
+ 261
+ 262
+ 263
+ 264
+ 265
+ 266
+ 267
+ 268
+ 269
+ 270
+ 271
+ 272
+ 273
+ 274
+ 275
+ 276
+ 277
+ 278
+ 279
+ 280
+ 281
+ 282
+ 283
+ 284
+ 285
+ 286
+ 287
+ 288
+ 289
+ 290
+ 291
+ 292
+ 293
+ 294
+ 295
+ 296
+ 297
+ 298
+ 299
+ 300
+ 301
+ 302
+ 303
+ 304
+ 305
+ 306
+ 307
+ 308
+ 309
+ 310
+ 311
+ 312
+ 313
+ 314
+ 315
+ 316
+ 317
+ 318
+ 319
+ 320
+ 321
+ 322
+ 323
+ 324
+ 325
+ 326
+ 327
+ 328
+ 329
+ 330
+ 331
+ 332
+ 333
+ 334
+ 335
+ 336
+ 337
+ 338
+ 339
+ 340
+ 341
+ 342
+ 343
+ 344
+ 345
+ 346
+ 347
+ 348
+ 349
+ 350
+ 351
+ 352
+ 353
+ 354
+ 355
+ 356
+ 357
+ 358
+ 359
+ 360
+ 361
+ 362
+ 363
+ 364
+ 365
+ 366
+ 367
+ 368
+ 369
+ 370
+ 371
+ 372
+ 373
+ 374
+ 375
+ 376
+ 377
+ 378
+ 379
+ 380
+ 381
+ 382
+ 383
+ 384
+ 385
+ 386
+ 387
+ 388
+ 389
+ 390
+ 391
+ 392
+ 393
+ 394
+ 395
+ 396
+ 397
+ 398
+ 399
+ 400
+ 401
+ 402
+ 403
+ 404
+ 405
+ 406
+ 407
+ 408
+ 409
+ 410
+ 411
+ 412
+ 413
+ 414
+ 415
+ 416
+ 417
+ 418
+ 419
+ 420
+ 421
+ 422
+ 423
+ 424
+ 425
+ 426
+ 427
+ 428
+ 429
+ 430
+ 431
+ 432
+ 433
+ 434
+ 435
+ 436
+ 437
+ 438
+ 439
+ 440
+ 441
+ 442
+ 443
+ 444
+ 445
+ 446
+ 447
+ 448
+ 449
+ 450
+ 451
+ 452
+ 453
+ 454
+ 455
+ 456
+ 457
+ 458
+ 459
+ 460
+ 461
+ 462
+ 463
+ 464
+ 465
+ 466
+ 467
+ 468
+ 469
+ 470
+ 471
+ 472
+ 473
+ 474
+ 475
+ 476
+ 477
+ 478
+ 479
+ 480
+ 481
+ 482
+ 483
+ 484
+ 485
+ 486
+ 487
+ 488
+ 489
+ 490
+ 491
+ 492
+ 493
+ 494
+ 495
+ 496
+ 497
+ 498
+ 499
+ 500
+ 501
+ 502
+ 503
+ 504
+ 505
+ 506
+ 507
+ 508
+ 509
+ 510
+ 511
+ 512
+ 513
+ 514
+ 515
+ 516
+ 517
+ 518
+ 519
+ 520
+ 521
+ 522
+ 523
+ 524
+ 525
+ 526
+ 527
+ 528
+ 529
+ 530
+ 531
+ 532
+ 533
+ 534
+ 535
+ 536
+ 537
+ 538
+ 539
+ 540
+ 541
+ 542
+ 543
+ 544
+ 545
+ 546
+ 547
+ 548
+ 549
+ 550
+ 551
+ 552
+ 553
+ 554
+ 555
+ 556
+ 557
+ 558
+ 559
+ 560
+ 561
+ 562
+ 563
+ 564
+ 565
+ 566
+ 567
+ 568
+ 569
+ 570
+ 571
+ 572
+ 573
+ 574
+ 575
+ 576
+ 577
+ 578
+ 579
+ 580
+ 581
+ 582
+ 583
+ 584
+ 585
+ 586
+ 587
+ 588
+ 589
+ 590
+ 591
+ 592
+ 593
+ 594
+ 595
+ 596
+ 597
+ 598
+ 599
+ 600
+ 601
+ 602
+ 603
+ 604
+ 605
+ 606
+ 607
+ 608
+ 609
+ 610
+ 611
+ 612
+ 613
+ 614
+ 615
+ 616
+ 617
+ 618
+ 619
+ 620
+ 621
+ 622
+ 623
+ 624
+ 625
+ 626
+ 627
+ 628
+ 629
+ 630
+ 631
+ 632
+ 633
+ 634
+ 635
+ 636
+ 637
+ 638
+ 639
+ 640
+ 641
+ 642
+ 643
+ 644
+ 645
+ 646
+ 647
+ 648
+ 649
+ 650
+ 651
+ 652
+ 653
+ 654
+ 655
+ 656
+ 657
+ 658
+ 659
+ 660
+ 661
+ 662
+ 663
+ 664
+ 665
+ 666
+ 667
+ 668
+ 669
+ 670
+ 671
+ 672
+ 673
+ 674
+ 675
+ 676
+ 677
+ 678
+ 679
+ 680
+ 681
+ 682
+ 683
+ 684
+ 685
+ 686
+ 687
+ 688
+ 689
+ 690
+ 691
+ 692
+ 693
+ 694
+ 695
+ 696
+ 697
+ 698
+ 699
+ 700
+ 701
+ 702
+ 703
+ 704
+ 705
+ 706
+ 707
+ 708
+ 709
+ 710
+ 711
+ 712
+ 713
+ 714
+ 715
+ 716
+ 717
+ 718
+ 719
+ 720
+ 721
+ 722
+ 723
+ 724
+ 725
+ 726
+ 727
+ 728
+ 729
+ 730
+ 731
+ 732
+ 733
+ 734
+ 735
+ 736
+ 737
+ 738
+ 739
+ 740
+ 741
+ 742
+ 743
+ 744
+ 745
+ 746
+ 747
+ 748
+ 749
+ 750
+ 751
+ 752
+ 753
+ 754
+ 755
+ 756
+ 757
+ 758
+ 759
+ 760
+ 761
+ 762
+ 763
+ 764
+ 765
+ 766
+ 767
+ 768
+ 769
+ 770
+ 771
+ 772
+ 773
+ 774
+ 775
+ 776
+ 777
+ 778
+ 779
+ 780
+ 781
+ 782
+ 783
+ 784
+ 785
+ 786
+ 787
+ 788
+ 789
+ 790
+ 791
+ 792
+ 793
+ 794
+ 795
+ 796
+ 797
+ 798
+ 799
+ 800
+ 801
+ 802
+ 803
+ 804
+ 805
+ 806
+ 807
+ 808
+ 809
+ 810
+ 811
+ 812
+ 813
+ 814
+ 815
+ 816
+ 817
+ 818
+ 819
+ 820
+ 821
+ 822
+ 823
+ 824
+ 825
+ 826
+ 827
+ 828
+ 829
+ 830
+ 831
+ 832
+ 833
+ 834
+ 835
+ 836
+ 837
+ 838
+ 839
+ 840
+ 841
+ 842
+ 843
+ 844
+ 845
+ 846
+ 847
+ 848
+ 849
+ 850
+ 851
+ 852
+ 853
+ 854
+ 855
+ 856
+ 857
+ 858
+ 859
+ 860
+ 861
+ 862
+ 863
+ 864
+ 865
+ 866
+ 867
+ 868
+ 869
+ 870
+ 871
+ 872
+ 873
+ 874
+ 875
+ 876
+ 877
+ 878
+ 879
+ 880
+ 881
+ 882
+ 883
+ 884
+ 885
+ 886
+ 887
+ 888
+ 889
+ 890
+ 891
+ 892
+ 893
+ 894
+ 895
+ 896
+ 897
+ 898
+ 899
+ 900
+ 901
+ 902
+ 903
+ 904
+ 905
+ 906
+ 907
+ 908
+ 909
+ 910
+ 911
+ 912
+ 913
+ 914
+ 915
+ 916
+ 917
+ 918
+ 919
+ 920
+ 921
+ 922
+ 923
+ 924
+ 925
+ 926
+ 927
+ 928
+ 929
+ 930
+ 931
+ 932
+ 933
+ 934
+ 935
+ 936
+ 937
+ 938
+ 939
+ 940
+ 941
+ 942
+ 943
+ 944
+ 945
+ 946
+ 947
+ 948
+ 949
+ 950
+ 951
+ 952
+ 953
+ 954
+ 955
+ 956
+ 957
+ 958
+ 959
+ 960
+ 961
+ 962
+ 963
+ 964
+ 965
+ 966
+ 967
+ 968
+ 969
+ 970
+ 971
+ 972
+ 973
+ 974
+ 975
+ 976
+ 977
+ 978
+ 979
+ 980
+ 981
+ 982
+ 983
+ 984
+ 985
+ 986
+ 987
+ 988
+ 989
+ 990
+ 991
+ 992
+ 993
+ 994
+ 995
+ 996
+ 997
+ 998
+ 999
+1000
+1001
+1002
+1003
+1004
+1005
+1006
+1007
+1008
+1009
+1010
+1011
+1012
+1013
+1014
+1015
+1016
+1017
+1018
+1019
+1020
+1021
+1022
+1023
+1024
+1025
+1026
+1027
+1028
+1029
+1030
+1031
+1032
+1033
+1034
+1035
+1036
+1037
+1038
+1039
+1040
+1041
+1042
+1043
+1044
+1045
+1046
+1047
+1048
+1049
+1050
+1051
+1052
+1053
+1054
+1055
+1056
+1057
+1058
+1059
+1060
+1061
+1062
+1063
+1064
+1065
+1066
+1067
+1068
+1069
+1070
+1071
+1072
+1073
+1074
+1075
+1076
+1077
+1078
+1079
+1080
+1081
+1082
+1083
+1084
+1085
+1086
+1087
+1088
+1089
+1090
+1091
+1092
+1093
+1094
+1095
+1096
+1097
+1098
+1099
+1100
+1101
+1102
+1103
+1104
+1105
+1106
+1107
+1108
+1109
+1110
+1111
+1112
+1113
+1114
+1115
+1116
+1117
+1118
+1119
+1120
+1121
+1122
+1123
+1124
+1125
+1126
+1127
+1128
+1129
+1130
+1131
+1132
+1133
+1134
+1135
+1136
+1137
+1138
+1139
+1140
+1141
+1142
+1143
+1144
+1145
+1146
+1147
+1148
+1149
+1150
+1151
+1152
+1153
+1154
+1155
+1156
+1157
+1158
+1159
+1160
+1161
+1162
+1163
+1164
+1165
+1166
+1167
+1168
+1169
+1170
+1171
+1172
+1173
+1174
+1175
+1176
+1177
+1178
+1179
+1180
+1181
+1182
+1183
+1184
+1185
+1186
+1187
+1188
+1189
+1190
+1191
+1192
+1193
+1194
+1195
+1196
+1197
+1198
+1199
+1200
+1201
+1202
+1203
+1204
+1205
+1206
+1207
+1208
+1209
+1210
+1211
+1212
+1213
+1214
+1215
+1216
+1217
+1218
+1219
+1220
+1221
+1222
+1223
+1224
+1225
+1226
+1227
+1228
+1229
+1230
+1231
+1232
+1233
+1234
+1235
+1236
+1237
+1238
+1239
+1240
+1241
+1242
+1243
+1244
+1245
+1246
+1247
+1248
+1249
+1250
+1251
+1252
+1253
+1254
+1255
+1256
+1257
+1258
+1259
+1260
+1261
+1262
+1263
+1264
+1265
+1266
+1267
+1268
+1269
+1270
+1271
+1272
+1273
+1274
+1275
+1276
+1277
+1278
+1279
+1280
+1281
+1282
+1283
+1284
+1285
+1286
+1287
+1288
+1289
+1290
+1291
+1292
+1293
+1294
+1295
+1296
+1297
+1298
+1299
+1300
+1301
+1302
+1303
+1304
+1305
+1306
+1307
+1308
+1309
+1310
+1311
+1312
+1313
+1314
+1315
+1316
+1317
+1318
+1319
+1320
+1321
+1322
+1323
+1324
+1325
+1326
+1327
+1328
+1329
+1330
+1331
+1332
+1333
+1334
+1335
+1336
+1337
+1338
+1339
+1340
+1341
+1342
+1343
+1344
+1345
+1346
+1347
+1348
+1349
+1350
+1351
+1352
+1353
+1354
+1355
+1356
+1357
+1358
+1359
+1360
+1361
+1362
+1363
+1364
+1365
+1366
+1367
+1368
+1369
+1370
+1371
+1372
+1373
+1374
+1375
+1376
+1377
+1378
+1379
+1380
+1381
+1382
+1383
+1384
+1385
+1386
+1387
+1388
+1389
+1390
+1391
+1392
+1393
+1394
+1395
+1396
+1397
+1398
+1399
+1400
+1401
+1402
+1403
+1404
+1405
+1406
+1407
+1408
+1409
+1410
+1411
+1412
+1413
+1414
+1415
+1416
+1417
+1418
+1419
+1420
+1421
+1422
+1423
+1424
+1425
+1426
+1427
+1428
+1429
+1430
+1431
+1432
+1433
+1434
+1435
+1436
+1437
+1438
+1439
+1440
+1441
+1442
+1443
+1444
+1445
+1446
+1447
+1448
+1449
+1450
+1451
+1452
+1453
+1454
class LDMatrix(object):
+    """
+    A class that represents Linkage-Disequilibrium (LD) matrices, which record
+    the SNP-by-SNP pairwise correlations in a sample of genetic data. The class
+    provides various functionalities for initializing, storing, loading, and
+    performing computations with LD matrices. The LD matrices are stored in a
+    hierarchical format using the `Zarr` library, which allows for efficient
+    storage and retrieval of the data.
+
+    The class provides the following functionalities:
+
+    * Initialize an `LDMatrix` object from plink's LD table files.
+    * Initialize an `LDMatrix` object from a sparse CSR matrix.
+    * Initialize an `LDMatrix` object from a Zarr array store.
+    * Compute LD scores for each SNP in the LD matrix.
+    * Filter the LD matrix based on SNP indices or ranges.
+
+    The Zarr hierarchy is structured as follows:
+
+    * `chr_22.zarr`: The Zarr group.
+        * `matrix`: The subgroup containing the data of the LD matrix in Scipy Sparse CSR matrix format.
+            * `data`: The array containing the non-zero entries of the LD matrix.
+            * `indptr`: The array containing the index pointers for the CSR matrix.
+        * `metadata`: The subgroup containing the metadata for variants included in the LD matrix.
+            * `snps`: The array containing the SNP rsIDs.
+            * `a1`: The array containing the alternative alleles.
+            * `a2`: The array containing the reference alleles.
+            * `maf`: The array containing the minor allele frequencies.
+            * `bp`: The array containing the base pair positions.
+            * `cm`: The array containing the centi Morgan positions.
+            * `ldscore`: The array containing the LD scores.
+        * `attrs`: A JSON-style metadata object containing general information about how the LD matrix
+        was calculated, including the chromosome number, sample size, genome build, LD estimator,
+        and estimator properties.
+
+    :ivar _zg: The Zarr group object that stores the LD matrix and its metadata.
+    :ivar _mat: The in-memory CSR matrix object.
+    :ivar in_memory: A boolean flag indicating whether the LD matrix is in memory.
+    :ivar is_symmetric: A boolean flag indicating whether the LD matrix is symmetric.
+    :ivar index: An integer index for the current SNP in the LD matrix (useful for iterators).
+    :ivar _mask: A boolean mask for filtering the LD matrix.
+
+    """
+
+    def __init__(self, zarr_group, symmetric=False):
+        """
+        Initialize an `LDMatrix` object from a Zarr group store.
+
+        :param zarr_group: The Zarr group object that stores the LD matrix.
+        :param symmetric: A boolean flag indicating whether to represent the LD matrix as symmetric.
+        """
+
+        # Checking the input for correct formatting:
+        # First, it has to be a Zarr group:
+        assert isinstance(zarr_group, zarr.hierarchy.Group)
+        # Second, it has to have a group called `matrix`:
+        assert 'matrix' in list(zarr_group.group_keys())
+
+        # Third, all the sparse array keys must be present:
+        arr_keys = list(zarr_group['matrix'].array_keys())
+        assert all([arr in arr_keys
+                    for arr in ('data', 'indptr')])
+
+        self._zg = zarr_group
+
+        self._mat = None
+        self.in_memory = False
+        self.is_symmetric = symmetric
+        self.index = 0
+
+        self._mask = None
+
+    @classmethod
+    def from_path(cls, ld_store_path):
+        """
+        Initialize an `LDMatrix` object from a pre-computed Zarr group store.
+        :param ld_store_path: The path to the Zarr array store on the filesystem.
+
+        !!! seealso "See Also"
+            * [from_dir][magenpy.LDMatrix.LDMatrix.from_dir]
+
+        """
+
+        for level in range(2):
+            try:
+                ld_group = zarr.open_group(ld_store_path, mode='r')
+                return cls(ld_group)
+            except zarr.hierarchy.GroupNotFoundError as e:
+                if level < 1:
+                    ld_store_path = osp.dirname(ld_store_path)
+                else:
+                    raise e
+
+    @classmethod
+    def from_dir(cls, ld_store_path):
+        """
+        Initialize an `LDMatrix` object from a Zarr array store.
+        :param ld_store_path: The path to the Zarr array store on the filesystem.
+
+        !!! seealso "See Also"
+            * [from_path][magenpy.LDMatrix.LDMatrix.from_path]
+        """
+        return cls.from_path(ld_store_path)
+
+    @classmethod
+    def from_csr(cls,
+                 csr_mat,
+                 store_path,
+                 overwrite=False,
+                 dtype='int16',
+                 compressor_name='lz4',
+                 compression_level=5):
+        """
+        Initialize an LDMatrix object from a sparse CSR matrix.
+
+        :param csr_mat: The sparse CSR matrix.
+        :param store_path: The path to the Zarr LD store where the data will be stored.
+        :param overwrite: If True, it overwrites the LD store at `store_path`.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+        :param compression_level: The compression level to use with the compressor (1-9).
+        """
+
+        dtype = np.dtype(dtype)
+
+        # Get the upper triangular part of the matrix:
+        triu_mat = triu(csr_mat, k=1, format='csr')
+
+        # Check that the non-zeros are contiguous around the diagonal with no gaps.
+        # If there are gaps, eliminate them or raise an error.
+        if np.diff(triu_mat.indices).max() > 1:
+            # TODO: Figure out a way to fix this automatically for the user?
+            raise ValueError("The non-zero entries of the LD matrix are not contiguous around the diagonal.")
+
+        # Create hierarchical storage with zarr groups:
+        store = zarr.DirectoryStore(store_path)
+        z = zarr.group(store=store, overwrite=overwrite)
+
+        # Create a compressor object:
+        compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+        # First sub-hierarchy stores the information for the sparse LD matrix:
+        mat = z.create_group('matrix')
+        if np.issubdtype(dtype, np.integer):
+            mat.array('data', quantize(triu_mat.data, int_dtype=dtype), dtype=dtype, compressor=compressor)
+        else:
+            mat.array('data', triu_mat.data.astype(dtype), dtype=dtype, compressor=compressor_name)
+
+        # Store the index pointer:
+        mat.array('indptr', triu_mat.indptr,
+                  dtype=np.int32, compressor=compressor)
+
+        return cls(z)
+
+    @classmethod
+    def from_plink_table(cls,
+                         plink_ld_file,
+                         snps,
+                         store_path,
+                         pandas_chunksize=None,
+                         overwrite=False,
+                         dtype='int16',
+                         compressor_name='lz4',
+                         compression_level=5):
+        """
+        Construct a Zarr LD matrix using output tables from plink1.9.
+        This class method takes the following inputs:
+
+        :param plink_ld_file: The path to the plink LD table file.
+        :param snps: An iterable containing the list of SNPs in the LD matrix.
+        :param store_path: The path to the Zarr LD store.
+        :param pandas_chunksize: If the LD table is large, provide chunk size
+        (i.e. number of rows to process at each step) to keep memory footprint manageable.
+        :param overwrite: If True, it overwrites the LD store at `store_path`.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+        :param compression_level: The compression level to use with the compressor (1-9).
+        """
+
+        dtype = np.dtype(dtype)
+
+        # Create hierarchical storage with zarr groups:
+        store = zarr.DirectoryStore(store_path)
+        z = zarr.group(store=store, overwrite=overwrite)
+
+        # Create a compressor object:
+        compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+        # First sub-hierarchy stores the information for the sparse LD matrix:
+        mat = z.create_group('matrix')
+        mat.empty('data', shape=len(snps)**2, dtype=dtype, compressor=compressor)
+
+        # Create a chunked iterator with pandas:
+        # Chunk size will correspond to the average chunk size for the Zarr array:
+        ld_chunks = pd.read_csv(plink_ld_file,
+                                sep=r'\s+',
+                                usecols=['SNP_A', 'R'],
+                                engine='c',
+                                chunksize=pandas_chunksize,
+                                dtype={'SNP_A': str, 'R': np.float32})
+
+        if pandas_chunksize is None:
+            ld_chunks = [ld_chunks]
+
+        # Create a dictionary mapping SNPs to their indices:
+        snp_dict = dict(zip(snps, np.arange(len(snps))))
+
+        indptr_counts = np.zeros(len(snps), dtype=np.int32)
+
+        total_len = 0
+
+        # For each chunk in the LD file:
+        for ld_chunk in ld_chunks:
+
+            # Create an indexed LD chunk:
+            ld_chunk['row_index'] = ld_chunk['SNP_A'].map(snp_dict)
+
+            # Add LD data to the zarr array:
+            if np.issubdtype(dtype, np.integer):
+                mat['data'][total_len:total_len + len(ld_chunk)] = quantize(ld_chunk['R'].values, int_dtype=dtype)
+            else:
+                mat['data'][total_len:total_len + len(ld_chunk)] = ld_chunk['R'].values.astype(dtype)
+
+            total_len += len(ld_chunk)
+
+            # Group by the row index:
+            grouped_ridx = ld_chunk.groupby('row_index').size()
+
+            # Add the number of entries to indptr_counts:
+            indptr_counts[grouped_ridx.index] += grouped_ridx.values
+
+        # Get the final indptr by computing cumulative sum:
+        indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+        # Store indptr in the zarr group:
+        mat.array('indptr', indptr, dtype=np.int32, compressor=compressor)
+
+        # Resize the data array:
+        mat['data'].resize(total_len)
+
+        return cls(z)
+
+    @classmethod
+    def from_dense_zarr_matrix(cls,
+                               dense_zarr,
+                               ld_boundaries,
+                               store_path,
+                               overwrite=False,
+                               delete_original=False,
+                               dtype='int16',
+                               compressor_name='lz4',
+                               compression_level=5):
+        """
+         Initialize a new LD matrix object using a Zarr array object. This method is
+         useful for converting a dense LD matrix computed using Dask (or other distributed computing
+         software) to a sparse or banded one.
+
+         :param dense_zarr: The path to the dense Zarr array object.
+         :param ld_boundaries: The LD boundaries for each SNP in the LD matrix (delineates the indices of
+            the leftmost and rightmost neighbors of each SNP).
+         :param store_path: The path where to store the new LD matrix.
+         :param overwrite: If True, it overwrites the LD store at `store_path`.
+         :param delete_original: If True, it deletes the original dense LD matrix.
+         :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+            and integer quantized data types int8 and int16).
+         :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+         :param compression_level: The compression level to use with the compressor (1-9).
+        """
+
+        dtype = np.dtype(dtype)
+
+        # If dense_zarr is a path, rather than a Zarr Array object, then
+        # open it as a Zarr array object before proceeding:
+        if isinstance(dense_zarr, str):
+            if osp.isfile(osp.join(dense_zarr, '.zarray')):
+                dense_zarr = zarr.open(dense_zarr)
+            else:
+                raise FileNotFoundError
+
+        # Create hierarchical storage with zarr groups:
+        store = zarr.DirectoryStore(store_path)
+        z = zarr.group(store=store, overwrite=overwrite)
+
+        # Create a compressor object:
+        compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+        # First sub-hierarchy stores the information for the sparse LD matrix:
+        mat = z.create_group('matrix')
+        mat.empty('data', shape=dense_zarr.shape[0]**2, dtype=dtype, compressor=compressor)
+
+        num_rows = dense_zarr.shape[0]
+        chunk_size = dense_zarr.chunks[0]
+
+        indptr_counts = np.zeros(num_rows, dtype=int)
+
+        total_len = 0
+
+        for chunk_idx in range(int(np.ceil(num_rows / chunk_size))):
+
+            chunk_start = chunk_idx * chunk_size
+            chunk_end = min((chunk_idx + 1) * chunk_size, num_rows)
+
+            z_chunk = dense_zarr[chunk_start: chunk_end]
+
+            data = []
+
+            chunk_len = 0
+
+            for j in range(chunk_start, chunk_end):
+
+                data.append(
+                    z_chunk[j - chunk_start][j + 1:ld_boundaries[1, j]]
+                )
+                indptr_counts[j] = len(data[-1])
+                chunk_len += int(ld_boundaries[1, j] - (j+1))
+
+            # Add data + columns indices to zarr array:
+            concat_data = np.concatenate(data)
+
+            if np.issubdtype(dtype, np.integer):
+                mat['data'][total_len:total_len + chunk_len] = quantize(concat_data, int_dtype=dtype)
+            else:
+                mat['data'][total_len:total_len + chunk_len] = concat_data.astype(dtype)
+
+            total_len += chunk_len
+
+        # Get the final indptr by computing cumulative sum:
+        indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+        # Store indptr in the zarr array:
+        mat.array('indptr', indptr, compressor=compressor)
+
+        # Resize the data and indices arrays:
+        mat['data'].resize(total_len)
+
+        if delete_original:
+            from .stats.ld.utils import delete_ld_store
+            delete_ld_store(dense_zarr)
+
+        return cls(z)
+
+    @classmethod
+    def from_ragged_zarr_matrix(cls,
+                                ragged_zarr,
+                                store_path,
+                                overwrite=False,
+                                delete_original=False,
+                                dtype='int16',
+                                compressor_name='lz4',
+                                compression_level=5):
+        """
+        Initialize a new LD matrix object using a Zarr array object
+        conforming to the old LD Matrix format from magenpy v<=0.0.12.
+
+        This utility function will also copy some of the stored attributes
+        associated with the matrix in the old format.
+
+        :param ragged_zarr: The path to the ragged Zarr array object.
+        :param store_path: The path where to store the new LD matrix.
+        :param overwrite: If True, it overwrites the LD store at `store_path`.
+        :param delete_original: If True, it deletes the original ragged LD matrix.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+        :param compression_level: The compression level to use with the compressor (1-9).
+        """
+
+        dtype = np.dtype(dtype)
+
+        # If ragged_zarr is a path, rather than a Zarr Array object, then
+        # open it as a Zarr array object before proceeding:
+        if isinstance(ragged_zarr, str):
+            if osp.isfile(osp.join(ragged_zarr, '.zarray')):
+                ragged_zarr = zarr.open(ragged_zarr)
+            else:
+                raise FileNotFoundError
+
+        num_rows = ragged_zarr.shape[0]
+        chunk_size = ragged_zarr.chunks[0]
+
+        # Create hierarchical storage with zarr groups:
+        store = zarr.DirectoryStore(store_path)
+        z = zarr.group(store=store, overwrite=overwrite)
+
+        # Create a compressor object:
+        compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+        # First sub-hierarchy stores the information for the sparse LD matrix:
+        mat = z.create_group('matrix')
+        mat.empty('data', shape=num_rows ** 2, dtype=dtype, compressor=compressor)
+
+        indptr_counts = np.zeros(num_rows, dtype=int)
+
+        # Get the LD boundaries from the Zarr array attributes:
+        ld_boundaries = np.array(ragged_zarr.attrs['LD boundaries'])
+
+        total_len = 0
+
+        for chunk_idx in range(int(np.ceil(num_rows / chunk_size))):
+
+            chunk_start = chunk_idx * chunk_size
+            chunk_end = min((chunk_idx + 1) * chunk_size, num_rows)
+
+            z_chunk = ragged_zarr[chunk_start: chunk_end]
+
+            data = []
+            chunk_len = 0
+
+            for j in range(chunk_start, chunk_end):
+
+                start, end = ld_boundaries[:, j]
+                new_start = (j - start) + 1
+
+                data.append(
+                    z_chunk[j - chunk_start][new_start:]
+                )
+                indptr_counts[j] = end - (j + 1)
+                chunk_len += int(end - (j + 1))
+
+            # Add data + columns indices to zarr array:
+            concat_data = np.concatenate(data)
+
+            if np.issubdtype(dtype, np.integer):
+                mat['data'][total_len:total_len + chunk_len] = quantize(concat_data, int_dtype=dtype)
+            else:
+                mat['data'][total_len:total_len + chunk_len] = concat_data.astype(dtype)
+
+            total_len += chunk_len
+
+        # Get the final indptr by computing cumulative sum:
+        indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+        # Store indptr in the zarr array:
+        mat.array('indptr', indptr, compressor=compressor)
+
+        # Resize the data and indices arrays:
+        mat['data'].resize(total_len)
+
+        # ============================================================
+        # Transfer the attributes/metadata from the old matrix format:
+
+        ld_mat = cls(z)
+
+        ld_mat.set_metadata('snps', np.array(ragged_zarr.attrs['SNP']))
+        ld_mat.set_metadata('a1', np.array(ragged_zarr.attrs['A1']))
+        ld_mat.set_metadata('a2', np.array(ragged_zarr.attrs['A2']))
+        ld_mat.set_metadata('maf', np.array(ragged_zarr.attrs['MAF']))
+        ld_mat.set_metadata('bp', np.array(ragged_zarr.attrs['BP']))
+        ld_mat.set_metadata('cm', np.array(ragged_zarr.attrs['cM']))
+
+        try:
+            ld_mat.set_metadata('ldscore', np.array(ragged_zarr.attrs['LDScore']))
+        except KeyError:
+            print("Did not find LD scores in old LD matrix format! Skipping...")
+
+        # Set matrix attributes:
+        ld_mat.set_store_attr('Chromosome', ragged_zarr.attrs['Chromosome'])
+        ld_mat.set_store_attr('LD estimator', ragged_zarr.attrs['LD estimator'])
+        ld_mat.set_store_attr('Estimator properties', ragged_zarr.attrs['Estimator properties'])
+        ld_mat.set_store_attr('Sample size', ragged_zarr.attrs['Sample size'])
+
+        if delete_original:
+            from .stats.ld.utils import delete_ld_store
+            delete_ld_store(ragged_zarr)
+
+        return ld_mat
+
+    @property
+    def n_snps(self):
+        """
+        :return: The number of variants in the LD matrix. If the matrix is loaded and filtered,
+        we return the number of variants remaining after applying the filter.
+        """
+        if self._mat is not None:
+            return self._mat.shape[0]
+        else:
+            return self.stored_n_snps
+
+    @property
+    def shape(self):
+        """
+
+        !!! seealso "See Also"
+            * [n_snps][magenpy.LDMatrix.LDMatrix.n_snps]
+
+        :return: The shape of the square LD matrix.
+        """
+        return self.n_snps, self.n_snps
+
+    @property
+    def store(self):
+        """
+        :return: The Zarr group store object.
+        """
+        return self._zg.store
+
+    @property
+    def compressor(self):
+        """
+        :return: The `numcodecs` compressor object for the LD data.
+        """
+        return self._zg['matrix/data'].compressor
+
+    @property
+    def zarr_group(self):
+        """
+        :return: The Zarr group object that stores the LD matrix and its metadata.
+        """
+        return self._zg
+
+    @property
+    def chunks(self):
+        """
+        :return: The chunks for the data array of the LD matrix.
+        """
+        return self._zg['matrix/data'].chunks
+
+    @property
+    def chunk_size(self):
+        """
+        :return: The chunk size for the data array of the LD matrix.
+        """
+        return self.chunks[0]
+
+    @property
+    def stored_n_snps(self):
+        """
+        :return: The number of variants stored in the LD matrix (irrespective of any masks / filters).
+        """
+        return self._zg['matrix/indptr'].shape[0] - 1
+
+    @property
+    def stored_dtype(self):
+        """
+        :return: The data type for the stored entries of `data` array of the LD matrix.
+        """
+        return self._zg['matrix/data'].dtype
+
+    @property
+    def stored_shape(self):
+        """
+        :return: The shape of the stored LD matrix (irrespective of any masks / filters).
+        """
+        n_snps = self.stored_n_snps
+        return n_snps, n_snps
+
+    @property
+    def dtype(self):
+        """
+        :return: The data type for the entries of the `data` array of the LD matrix. If the matrix is
+        in memory, return the dtype of the CSR matrix. Otherwise, return the
+        dtype of the entries in the Zarr array.
+        """
+        if self.in_memory:
+            return self.csr_matrix.dtype
+        else:
+            return self.stored_dtype
+
+    @property
+    def chromosome(self):
+        """
+        :return: The chromosome for which this LD matrix was calculated.
+        """
+        return self.get_store_attr('Chromosome')
+
+    @property
+    def ld_estimator(self):
+        """
+        :return: The LD estimator used to compute the LD matrix. Examples include: `block`, `windowed`, `shrinkage`.
+        """
+        return self.get_store_attr('LD estimator')
+
+    @property
+    def estimator_properties(self):
+        """
+        :return: The properties of the LD estimator used to compute the LD matrix.
+        """
+        return self.get_store_attr('Estimator properties')
+
+    @property
+    def sample_size(self):
+        """
+        :return: The sample size used to compute the LD matrix.
+        """
+        return self.get_store_attr('Sample size')
+
+    @property
+    def genome_build(self):
+        """
+        :return: The genome build based on which the base pair coordinates are defined.
+        """
+        return self.get_store_attr('Genome build')
+
+    @property
+    def snps(self):
+        """
+        :return: rsIDs of the variants included in the LD matrix.
+        """
+        return self.get_metadata('snps')
+
+    @property
+    def a1(self):
+        """
+        :return: The alternative alleles of the variants included in the LD matrix.
+        """
+        return self.get_metadata('a1')
+
+    @property
+    def a2(self):
+        """
+        :return: The reference alleles of the variants included in the LD matrix.
+        """
+        return self.get_metadata('a2')
+
+    @property
+    def maf(self):
+        """
+        :return: The minor allele frequency (MAF) of the alternative allele (A1) in the LD matrix.
+        """
+        try:
+            return self.get_metadata('maf')
+        except KeyError:
+            return None
+
+    @property
+    def bp_position(self):
+        """
+        !!! seealso "See Also"
+            * [genome_build][magenpy.LDMatrix.LDMatrix.genome_build]
+
+        :return: The base pair position of each SNP in the LD matrix.
+        """
+        return self.get_metadata('bp')
+
+    @property
+    def cm_position(self):
+        """
+        :return: The centi Morgan (cM) position of each variant in the LD matrix.
+        """
+        try:
+            return self.get_metadata('cm')
+        except KeyError:
+            return None
+
+    @property
+    def ld_score(self):
+        """
+        :return: The LD score of each variant in the LD matrix.
+        """
+        try:
+            return self.get_metadata('ldscore')
+        except KeyError:
+
+            ld_score = self.compute_ld_scores()
+
+            if self._mask is None:
+                self.set_metadata('ldscore', ld_score, overwrite=True)
+
+            return ld_score
+
+    @property
+    def ld_boundaries(self):
+        """
+        The LD boundaries associated with each variant.
+        The LD boundaries are defined as the index of the leftmost neighbor
+        (lower boundary) and the rightmost neighbor (upper boundary) of for each variant.
+        If the LD matrix is upper triangular, then the boundaries for variant `i` go from `i + 1` to `i + k_i`,
+        where `k_i` is the number of neighbors that SNP `i` is in LD with.
+
+        :return: A matrix of shape `(2, n_snps)` where the first row contains the lower boundaries and the second row
+        contains the upper boundaries.
+
+        """
+
+        indptr = self.indptr
+
+        if self.in_memory and self.is_symmetric:
+
+            # Check that the matrix has canonical format (indices are sorted / no duplicates):
+            assert self.csr_matrix.has_canonical_format
+
+            return np.vstack([self.indices[indptr[:-1]], self.indices[indptr[1:] - 1] + 1]).astype(np.int32)
+
+        else:
+
+            # If the matrix is not in memory, then the format is upper triangular.
+            # Therefore, it goes from diagonal + 1 to the end of the row.
+            left_bound = np.arange(1, len(indptr) - 1)  # The leftmost neighbor of each SNP (diagonal + 1)
+            return np.vstack([left_bound, left_bound + np.diff(indptr[:-1])]).astype(np.int32)
+
+    @property
+    def window_size(self):
+        """
+        !!! seealso "See Also"
+            * [n_neighbors][magenpy.LDMatrix.LDMatrix.n_neighbors]
+
+        !!! note
+            This includes the variant itself if the matrix is in memory and is symmetric.
+
+        :return: The number of variants in the LD window for each SNP.
+
+        """
+        return np.diff(self.indptr)
+
+    @property
+    def n_neighbors(self):
+        """
+        The number of variants in the LD window for each SNP.
+
+        !!! seealso "See Also"
+            * [window_size][magenpy.LDMatrix.LDMatrix.window_size]
+
+        !!! note
+            This includes the variant itself if the matrix is in memory and is symmetric.
+
+        """
+        return self.window_size()
+
+    @property
+    def csr_matrix(self):
+        """
+        :return: The in-memory CSR matrix object.
+
+        ..note ::
+            If the LD matrix is not in-memory, then it'll be loaded using default settings.
+
+        """
+        if self._mat is None:
+            self.load()
+        return self._mat
+
+    @property
+    def data(self):
+        """
+        :return: The `data` array of the sparse `CSR` matrix, containing the entries of the LD matrix.
+        """
+        if self.in_memory:
+            return self.csr_matrix.data
+        else:
+            return self._zg['matrix/data']
+
+    @property
+    def indices(self):
+        """
+        :return: The column indices of the non-zero elements of the sparse, CSR representation of the LD matrix.
+        """
+        if self.in_memory:
+            return self.csr_matrix.indices
+        else:
+            ld_bounds = self.ld_boundaries
+
+            from .stats.ld.c_utils import expand_ranges
+
+            return expand_ranges(ld_bounds[0], ld_bounds[1], self.data.shape[0])
+
+    @property
+    def row_indices(self):
+        """
+        :return: The row indices of the non-zero elements of the sparse, CSR representation of the LD matrix
+        """
+        if self.in_memory:
+            # TODO: Check that this behaves correctly if some entries are zero but not eliminated.
+            return self.csr_matrix.nonzero()[0]
+        else:
+            indptr = self.indptr
+            return np.repeat(np.arange(len(indptr) - 1), np.diff(indptr))
+
+    @property
+    def indptr(self):
+        """
+        :return: The index pointers `indptr` delineating where the data for each row of the flattened,
+        sparse CSR representation of the lD matrix.
+        """
+        if self.in_memory:
+            return self.csr_matrix.indptr
+        else:
+            return self._zg['matrix/indptr']
+
+    def filter_snps(self, extract_snps=None, extract_file=None):
+        """
+        Filter the LDMatrix to keep a subset of variants. This mainly sets
+        the mask for the LD matrix, which is used to hide/remove some SNPs from the LD matrix,
+        without altering the stored objects on-disk.
+
+        :param extract_snps: A list or array of SNP rsIDs to keep.
+        :param extract_file: A plink-style file containing the SNP rsIDs to keep.
+        """
+
+        assert extract_snps is not None or extract_file is not None
+
+        if extract_snps is None:
+            from .parsers.misc_parsers import read_snp_filter_file
+            extract_snps = read_snp_filter_file(extract_file)
+
+        from .utils.compute_utils import intersect_arrays
+
+        new_mask = intersect_arrays(self.get_metadata('snps', apply_mask=False),
+                                    extract_snps,
+                                    return_index=True)
+
+        self.set_mask(new_mask)
+
+    def get_mask(self):
+        """
+        :return: The mask (a boolean flag array) used to hide/remove some SNPs from the LD matrix.
+        """
+        return self._mask
+
+    def set_mask(self, mask):
+        """
+        Set the mask (a boolean array) to hide/remove some SNPs from the LD matrix.
+        :param mask: An array of indices or boolean mask for SNPs to retain.
+        """
+
+        # If the mask is equivalent to the current mask, return:
+        if np.array_equal(mask, self._mask):
+            return
+
+        # If the mask is boolean, convert to indices (should we?):
+        if mask.dtype == bool:
+            self._mask = np.where(mask)[0]
+        else:
+            self._mask = mask
+
+        # If the data is already in memory, reload:
+        if self.in_memory:
+            self.load(force_reload=True,
+                      return_symmetric=self.is_symmetric,
+                      fill_diag=self.is_symmetric)
+
+    def to_snp_table(self, col_subset=None):
+        """
+        :param col_subset: The subset of columns to add to the table. If None, it returns
+        all available columns.
+
+        :return: A `pandas` dataframe of the SNP attributes and metadata for variants
+        included in the LD matrix.
+        """
+
+        col_subset = col_subset or ['CHR', 'SNP', 'POS', 'A1', 'A2', 'MAF', 'LDScore']
+
+        table = pd.DataFrame({'SNP': self.snps})
+
+        for col in col_subset:
+            if col == 'CHR':
+                table['CHR'] = self.chromosome
+            if col == 'POS':
+                table['POS'] = self.bp_position
+            if col == 'cM':
+                table['cM'] = self.cm_position
+            if col == 'A1':
+                table['A1'] = self.a1
+            if col == 'A2':
+                table['A2'] = self.a2
+            if col == 'MAF':
+                table['MAF'] = self.maf
+            if col == 'LDScore':
+                table['LDScore'] = self.ld_score
+            if col == 'WindowSize':
+                table['WindowSize'] = self.window_size
+
+        return table[list(col_subset)]
+
+    def compute_ld_scores(self,
+                          annotation_matrix=None,
+                          corrected=True,
+                          chunk_size=10_000):
+        """
+
+        Computes the LD scores for variants in the LD matrix. LD Scores are defined
+        as the sum of the squared pairwise Pearson Correlation coefficient between the focal SNP and
+        all its neighboring SNPs. See Bulik-Sullivan et al. (2015) for details.
+
+        :param annotation_matrix: A matrix of annotations for each variant for which to aggregate the LD scores.
+        :param corrected: Use the sample-size corrected estimator for the squared Pearson correlation coefficient.
+            See Bulik-Sullivan et al. (2015).
+        :param chunk_size: Specify the number of rows (i.e. SNPs) to compute the LD scores for simultaneously.
+            Smaller chunk sizes should require less memory resources. If set to None, we compute LD scores
+            for all SNPs in the LD matrix in one go.
+
+        :return: An array of LD scores for each variant in the LD matrix.
+        """
+
+        if chunk_size is None:
+            chunk_size = self.stored_n_snps
+
+        if annotation_matrix is None:
+            annotation_matrix = np.ones((self.n_snps, 1), dtype=np.float32)
+
+        ld_scores = np.zeros((self.n_snps, annotation_matrix.shape[1]))
+
+        for chunk_idx in range(int(np.ceil(self.stored_n_snps / chunk_size))):
+
+            start_row = chunk_idx*chunk_size
+            end_row = (chunk_idx + 1)*chunk_size
+
+            csr_mat = self.load_rows(start_row=start_row,
+                                     end_row=end_row,
+                                     return_symmetric=False,
+                                     fill_diag=False,
+                                     dtype=np.float32)
+
+            # If a mask is set, apply it to the matrix:
+            if self._mask is not None:
+                csr_mat = csr_mat[self._mask, :][:, self._mask]
+
+            mat_sq = csr_mat.power(2)
+
+            if corrected:
+                mat_sq.data -= (1. - mat_sq.data) / (self.sample_size - 2)
+
+            ld_scores += mat_sq.dot(annotation_matrix)
+            ld_scores += mat_sq.T.dot(annotation_matrix)
+
+        # Add the contribution of the diagonal:
+        ld_scores += identity(self.n_snps, dtype=np.float32).dot(annotation_matrix)
+
+        # Set floating type to float32:
+        ld_scores = ld_scores.astype(np.float32)
+
+        if ld_scores.shape[1] == 1:
+            return ld_scores.flatten()
+        else:
+            return ld_scores
+
+    def multiply(self, vec):
+        """
+        Multiply the LD matrix with an input vector `vec`.
+
+        !!! seealso "See Also"
+            * [dot][magenpy.LDMatrix.LDMatrix.dot]
+
+        :return: The product of the LD matrix with the input vector.
+        """
+        return self.csr_matrix.dot(vec)
+
+    def dot(self, vec):
+        """
+        Multiply the LD matrix with an input vector `vec`.
+
+        !!! seealso "See Also"
+            * [multiply][magenpy.LDMatrix.LDMatrix.multiply]
+
+        :return: The product of the LD matrix with the input vector.
+
+        """
+        return self.multiply(vec)
+
+    def estimate_uncompressed_size(self, dtype=None):
+        """
+        Provide an estimate of size of the uncompressed LD matrix in megabytes (MB).
+        This is only a rough estimate. Depending on how the LD matrix is loaded, the actual size
+        may be much larger than this estimate.
+
+        :return: The estimated size of the uncompressed LD matrix in MB.
+
+        """
+
+        if dtype is None:
+            dtype = self.stored_dtype
+
+        return 2.*self._zg['matrix/data'].shape[0]*np.dtype(dtype).itemsize / 1024 ** 2
+
+    def get_metadata(self, key, apply_mask=True):
+        """
+        Get the metadata associated with each variant in the LD matrix.
+        :param key: The key for the metadata item.
+        :param apply_mask: If True, apply the mask (e.g. filter) to the metadata.
+
+        :return: The metadata item for each variant in the LD matrix.
+        :raises KeyError: if the metadata item is not set.
+        """
+        try:
+            if self._mask is not None and apply_mask:
+                return self._zg[f'metadata/{key}'][self._mask]
+            else:
+                return self._zg[f'metadata/{key}'][:]
+        except KeyError:
+            raise KeyError(f"LD matrix metadata item {key} is not set!")
+
+    def get_store_attr(self, attr):
+        """
+        Get the attribute or metadata `attr` associated with the LD matrix.
+        :param attr: The attribute name.
+
+        :return: The value for the attribute.
+        :raises KeyError: if the attribute is not set.
+        """
+        try:
+            return self._zg.attrs[attr]
+        except KeyError:
+            print(f"Warning: Attribute '{attr}' is not set!")
+            return None
+
+    def set_store_attr(self, attr, value):
+        """
+        Set the attribute `attr` associated with the LD matrix. This is used
+        to set high-level information, such as information about the sample from which
+        the matrix was computed, the LD estimator used, its properties, etc.
+
+        :param attr: The attribute name.
+        :param value: The value for the attribute.
+        """
+
+        self._zg.attrs[attr] = value
+
+    def set_metadata(self, key, value, overwrite=False):
+        """
+        Set the metadata field associated with variants the LD matrix.
+        :param key: The key for the metadata item.
+        :param value: The value for the metadata item (an array with the same length as the number of variants).
+        :param overwrite: If True, overwrite the metadata item if it already exists.
+        """
+
+        if 'metadata' not in list(self._zg.group_keys()):
+            meta = self._zg.create_group('metadata')
+        else:
+            meta = self._zg['metadata']
+
+        value = np.array(value)
+
+        if np.issubdtype(value.dtype, np.floating):
+            dtype = np.float32
+        elif np.issubdtype(value.dtype, np.integer):
+            dtype = np.int32
+        else:
+            dtype = str
+
+        meta.array(key, value, overwrite=overwrite, dtype=dtype, compressor=self.compressor)
+
+    def update_rows_inplace(self, new_csr, start_row=None, end_row=None):
+        """
+        A utility function to perform partial updates to a subset of rows in the
+        LD matrix. The function takes a new CSR matrix and, optionally, a start
+        and end row delimiting the chunk of the LD matrix to update with the `new_csr`.
+
+        !!! note
+            Current implementation assumes that the update does not change the sparsity
+            structure of the original matrix. Updating the matrix with new sparsity structure
+            is a harder problem that we will try to tackle later on.
+
+        !!! note
+            Current implementation assumes `new_csr` is upper triangular.
+
+        :param new_csr: A sparse CSR matrix (`scipy.sparse.csr_matrix`) where the column dimension
+        matches the column dimension of the LD matrix.
+        :param start_row: The start row for the chunk to update.
+        :param end_row: The end row for the chunk to update.
+
+        :raises AssertionError: if the column dimension of `new_csr` does not match the column dimension
+        """
+
+        assert new_csr.shape[1] == self.stored_n_snps
+
+        start_row = start_row or 0
+        end_row = end_row or self.stored_n_snps
+
+        # Sanity checking:
+        assert start_row >= 0
+        assert end_row <= self.stored_n_snps
+
+        indptr = self._zg['matrix/indptr'][:]
+
+        data_start = indptr[start_row]
+        data_end = indptr[end_row]
+
+        # TODO: Check that this covers most cases and would not result in unexpected behavior
+        if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(new_csr.dtype, np.floating):
+            self._zg['matrix/data'][data_start:data_end] = quantize(new_csr.data, int_dtype=self.stored_dtype)
+        else:
+            self._zg['matrix/data'][data_start:data_end] = new_csr.data.astype(self.stored_dtype)
+
+    def low_memory_load(self, dtype=None):
+        """
+        A utility method to load the LD matrix in low-memory mode.
+        The method will load the entries of the upper triangular portion of the matrix,
+        perform filtering based on the mask (if set), and return the filtered data
+        and index pointer (`indptr`) arrays.
+
+        This is useful for some application, such as the `low_memory` version of
+        the `viprs` method, because it avoids reconstructing the `indices` array for the CSR matrix,
+        which can potentially be a very long array of large integers.
+
+        !!! note
+            The method, by construction, does not support loading the full symmetric matrix. If
+            that's the goal, use the `.load()` or `.load_rows()` methods.
+
+        !!! seealso "See Also"
+            * [load_rows][magenpy.LDMatrix.LDMatrix.load_rows]
+            * [load][magenpy.LDMatrix.LDMatrix.load]
+
+        :param dtype: The data type for the entries of the LD matrix.
+
+        :return: A tuple of the data and index pointer arrays for the LD matrix.
+
+        """
+
+        # Determine the final data type for the LD matrix entries
+        # and whether we need to perform dequantization or not depending on
+        # the stored data type and the requested data type.
+
+        if dtype is None:
+            dtype = self.stored_dtype
+            dequantize_data = False
+        else:
+            dtype = np.dtype(dtype)
+            if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(dtype, np.floating):
+                dequantize_data = True
+            else:
+                dequantize_data = False
+
+        # Get the index pointer array:
+        indptr = self._zg['matrix/indptr'][:]
+
+        # Filter the index pointer array based on the mask:
+        if self._mask is not None:
+
+            if np.issubdtype(self._mask.dtype, np.integer):
+                mask = np.zeros(self.stored_n_snps, dtype=np.int8)
+                mask[self._mask] = 1
+            else:
+                mask = self._mask
+
+            from .stats.ld.c_utils import filter_ut_csr_matrix_low_memory
+
+            data_mask, indptr = filter_ut_csr_matrix_low_memory(indptr, mask)
+            # Unfortunately, .vindex is very slow in Zarr right now (~order of magnitude)
+            # So for now, we load the entire data array before performing the mask selection:
+            data = self._zg['matrix/data'][:][data_mask]
+        else:
+            data = self._zg['matrix/data'][:]
+
+        if dequantize_data:
+            return dequantize(data, float_dtype=dtype), indptr
+        else:
+            return data.astype(dtype), indptr
+
+    def load_rows(self,
+                  start_row=None,
+                  end_row=None,
+                  return_symmetric=False,
+                  fill_diag=False,
+                  keep_shape=True,
+                  dtype=None):
+        """
+        A utility function to allow for loading a subset of the LD matrix.
+        By specifying `start_row` and `end_row`, the user can process or inspect small
+        blocks of the LD matrix without loading the whole thing into memory.
+
+        TODO: Consider using `low_memory_load` internally to avoid reconstructing the `indices` array.
+
+        !!! note
+            This method does not perform any filtering on the stored data.
+            To access the LD matrix with filtering, use `.load()` or `low_memory_load`.
+
+        !!! seealso "See Also"
+            * [low_memory_load][magenpy.LDMatrix.LDMatrix.low_memory_load]
+            * [load][magenpy.LDMatrix.LDMatrix.load]
+
+        :param start_row: The start row to load to memory
+        :param end_row: The end row (not inclusive) to load to memory
+        :param return_symmetric: If True, return a full symmetric representation of the LD matrix.
+        :param fill_diag: If True, fill the diagonal of the LD matrix with ones.
+        :param keep_shape: If True, return the LD matrix with the same shape as the original. Here,
+        entries that are outside the requested start_row:end_row region will be zeroed out.
+        :param dtype: The data type for the entries of the LD matrix.
+
+        :return: The requested sub-matrix of the LD matrix.
+        """
+
+        # Determine the final data type for the LD matrix entries
+        # and whether we need to perform dequantization or not depending on
+        # the stored data type and the requested data type.
+        if dtype is None:
+            dtype = self.stored_dtype
+            dequantize_data = False
+        else:
+            dtype = np.dtype(dtype)
+            if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(dtype, np.floating):
+                dequantize_data = True
+            else:
+                dequantize_data = False
+
+        # Sanity checking + forming the dimensions of the
+        # requested sub-matrix:
+        n_snps = self.stored_n_snps
+
+        start_row = start_row or 0
+        end_row = end_row or n_snps
+
+        # Sanity checking:
+        assert start_row >= 0
+        end_row = min(end_row, n_snps)
+
+        # Load the index pointer from disk:
+        indptr = self._zg['matrix/indptr'][:]
+
+        # Determine the start and end positions in the data matrix
+        # based on the requested start and end rows:
+        data_start = indptr[start_row]
+        data_end = indptr[end_row]
+
+        # If the user is requesting a subset of the matrix, then we need to adjust
+        # the index pointer accordingly:
+        if start_row > 0 or end_row < n_snps:
+            # Zero out all index pointers before `start_row`:
+            indptr = np.clip(indptr - data_start, a_min=0, a_max=None)
+            # Adjust all index pointers after `end_row`:
+            indptr[end_row+1:] = (data_end - data_start)
+
+        # Extract the data for the requested rows:
+        csr_data = self._zg['matrix/data'][data_start:data_end]
+
+        # If we need to de-quantize the data, do it now:
+        if dequantize_data:
+            csr_data = dequantize(csr_data, float_dtype=dtype)
+
+        # Construct a CSR matrix from the loaded data, updated indptr, and indices:
+
+        # Get the indices array:
+        if self.in_memory:
+            # If the matrix (or a version of it) is already loaded,
+            # then set the `in_memory` flag to False before fetching the indices.
+            self.in_memory = False
+            indices = self.indices
+            self.in_memory = True
+        else:
+            indices = self.indices
+
+        mat = csr_matrix(
+            (
+                csr_data,
+                indices[data_start:data_end],
+                indptr
+            ),
+            shape=(n_snps, n_snps),
+            dtype=dtype
+        )
+
+        # Determine the "invalid" value for the purposes of reconstructing
+        # the symmetric matrix:
+        if np.issubdtype(dtype, np.integer):
+            # For integers, we don't use the minimum value during quantization
+            # because we would like to have the zero point at exactly zero. So,
+            # we can use this value as our alternative to `nan`.
+            invalid_value = np.iinfo(dtype).min
+            identity_val = np.iinfo(dtype).max
+        else:
+            invalid_value = np.nan
+            identity_val = 1
+
+        if return_symmetric:
+
+            # First, replace explicit zeros with invalid value (this is a hack to prevent scipy
+            # from eliminating those zeros when making the matrix symmetric):
+            mat.data[mat.data == 0] = invalid_value
+
+            # Add the matrix transpose to make it symmetric:
+            mat = (mat + mat.T).astype(dtype)
+
+            # If the user requested filling the diagonals, do it here:
+            if fill_diag:
+                diag_vals = np.concatenate([np.zeros(start_row, dtype=dtype),
+                                            identity_val*np.ones(end_row - start_row, dtype=dtype),
+                                            np.zeros(n_snps - end_row, dtype=dtype)])
+                mat += diags(diag_vals, dtype=dtype, shape=mat.shape)
+
+            # Replace the invalid values with zeros again:
+            if np.isnan(invalid_value):
+                mat.data[np.isnan(mat.data)] = 0
+            else:
+                mat.data[mat.data == invalid_value] = 0
+
+            return mat
+        elif fill_diag:
+            diag_vals = np.concatenate([np.zeros(start_row, dtype=dtype),
+                                        identity_val*np.ones(end_row - start_row, dtype=dtype),
+                                        np.zeros(n_snps - end_row, dtype=dtype)])
+            mat += diags(diag_vals, dtype=dtype, shape=mat.shape)
+
+        # If the shape remains the same, return the matrix as is.
+        # Otherwise, return the requested sub-matrix:
+        if keep_shape:
+            return mat
+        else:
+            return mat[start_row:end_row, :]
+
+    def load(self,
+             force_reload=False,
+             return_symmetric=True,
+             fill_diag=True,
+             dtype=None):
+
+        """
+        Load the LD matrix from on-disk storage in the form of Zarr arrays to memory,
+        in the form of sparse CSR matrices.
+
+        !!! seealso "See Also"
+            * [low_memory_load][magenpy.LDMatrix.LDMatrix.low_memory_load]
+            * [load_rows][magenpy.LDMatrix.LDMatrix.load_rows]
+
+        :param force_reload: If True, it will reload the data even if it is already in memory.
+        :param return_symmetric: If True, return a full symmetric representation of the LD matrix.
+        :param fill_diag: If True, fill the diagonal elements of the LD matrix with ones.
+        :param dtype: The data type for the entries of the LD matrix.
+
+        :return: The LD matrix as a sparse CSR matrix.
+        """
+
+        if dtype is not None:
+            dtype = np.dtype(dtype)
+
+        if self.in_memory:
+            # If the LD matrix is already in memory:
+
+            if (return_symmetric == self.is_symmetric) and not force_reload:
+                # If the requested symmetry is the same as the one already loaded,
+                # and the user asked not to force a reload, then do nothing.
+
+                # If the currently loaded LD matrix has float entries and the user wants
+                # the return type to be another floating point, then just cast and return.
+                # Otherwise, we have to reload the matrix:
+                if np.issubdtype(self._mat.data.dtype, np.floating) and np.issubdtype(dtype, np.floating):
+                    self._mat.data = self._mat.data.astype(dtype)
+                    return
+                elif self._mat.data.dtype == dtype:
+                    return
+
+        # If we are re-loading the matrix, make sure to release the current one:
+        self.release()
+
+        self._mat = self.load_rows(return_symmetric=return_symmetric,
+                                   fill_diag=fill_diag,
+                                   dtype=dtype)
+
+        # If a mask is set, apply it:
+        if self._mask is not None:
+            self._mat = self._mat[self._mask, :][:, self._mask]
+
+        # Update the flags:
+        self.in_memory = True
+        self.is_symmetric = return_symmetric
+
+    def release(self):
+        """
+        Release the LD data from memory.
+        """
+        self._mat = None
+        self.in_memory = False
+        self.is_symmetric = False
+        self.index = 0
+
+    def get_row(self, index, return_indices=False):
+        """
+        Extract a single row from the LD matrix.
+
+        :param index: The index of the row to extract.
+        :param return_indices: If True, return the indices of the non-zero elements of that row.
+
+        :return: The requested row of the LD matrix.
+        """
+
+        if self.in_memory:
+            row = self.csr_matrix.getrow(index)
+            if return_indices:
+                return row.data, row.indices
+            else:
+                return row.data
+        else:
+            indptr = self.indptr[:]
+            start_idx, end_idx = indptr[index], indptr[index + 1]
+            if return_indices:
+                return self.data[start_idx:end_idx], np.arange(index + 1,
+                                                               index + 1 + (indptr[index + 1] - indptr[index]))
+            else:
+                return self.data[start_idx:end_idx]
+
+    def validate_ld_matrix(self):
+        """
+        Checks that the `LDMatrix` object has correct structure and
+        checks its contents for validity.
+
+        Specifically, we check that:
+        * The dimensions of the matrix and its associated attributes are matching.
+        * The masking is working properly.
+
+        :return: True if the matrix has the correct structure.
+        :raises ValueError: if the matrix is not valid.
+        """
+
+        class_attrs = ['snps', 'a1', 'a2', 'maf', 'bp_position', 'cm_position', 'ld_score']
+
+        for attr in class_attrs:
+            attribute = getattr(self, attr)
+            if attribute is None:
+                continue
+            if len(attribute) != len(self):
+                raise ValueError(f"Invalid LD Matrix: Dimensions for attribute {attr} are not aligned!")
+
+        # TODO: Add other sanity checks here?
+
+        return True
+
+    def __getstate__(self):
+        return self.store.path, self.in_memory, self.is_symmetric, self._mask
+
+    def __setstate__(self, state):
+
+        path, in_mem, is_symmetric, mask = state
+
+        self._zg = zarr.open_group(path, mode='r')
+        self.in_memory = in_mem
+        self.is_symmetric = is_symmetric
+        self._mat = None
+        self.index = 0
+        self._mask = None
+
+        if mask is not None:
+            self.set_mask(mask)
+
+        if in_mem:
+            self.load(return_symmetric=is_symmetric, fill_diag=is_symmetric)
+
+    def __len__(self):
+        return self.n_snps
+
+    def __getitem__(self, index):
+        return self.get_row(index)
+
+    def __iter__(self):
+        """
+        TODO: Add a flag to allow for chunked iterator, with limited memory footprint.
+        """
+        self.index = 0
+        self.load(return_symmetric=self.is_symmetric)
+        return self
+
+    def __next__(self):
+
+        if self.index == len(self):
+            self.index = 0
+            raise StopIteration
+
+        next_item = self.get_row(self.index)
+        self.index += 1
+
+        return next_item
+
+
+ + + +
+ + + + + + + +
+ + + +

+ a1 + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The alternative alleles of the variants included in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ a2 + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The reference alleles of the variants included in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ bp_position + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The base pair position of each SNP in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ chromosome + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chromosome for which this LD matrix was calculated.

+
+
+
+ +
+ +
+ + + +

+ chunk_size + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chunk size for the data array of the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ chunks + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chunks for the data array of the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ cm_position + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The centi Morgan (cM) position of each variant in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ compressor + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The numcodecs compressor object for the LD data.

+
+
+
+ +
+ +
+ + + +

+ csr_matrix + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The in-memory CSR matrix object. ..note :: If the LD matrix is not in-memory, then it'll be loaded using default settings.

+
+
+
+ +
+ +
+ + + +

+ data + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The data array of the sparse CSR matrix, containing the entries of the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ dtype + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The data type for the entries of the data array of the LD matrix. If the matrix is in memory, return the dtype of the CSR matrix. Otherwise, return the dtype of the entries in the Zarr array.

+
+
+
+ +
+ +
+ + + +

+ estimator_properties + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The properties of the LD estimator used to compute the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ genome_build + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The genome build based on which the base pair coordinates are defined.

+
+
+
+ +
+ +
+ + + +

+ indices + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The column indices of the non-zero elements of the sparse, CSR representation of the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ indptr + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The index pointers indptr delineating where the data for each row of the flattened, sparse CSR representation of the lD matrix.

+
+
+
+ +
+ +
+ + + +

+ ld_boundaries + + + property + + +

+ + +
+ +

The LD boundaries associated with each variant. +The LD boundaries are defined as the index of the leftmost neighbor +(lower boundary) and the rightmost neighbor (upper boundary) of for each variant. +If the LD matrix is upper triangular, then the boundaries for variant i go from i + 1 to i + k_i, +where k_i is the number of neighbors that SNP i is in LD with.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A matrix of shape (2, n_snps) where the first row contains the lower boundaries and the second row contains the upper boundaries.

+
+
+
+ +
+ +
+ + + +

+ ld_estimator + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The LD estimator used to compute the LD matrix. Examples include: block, windowed, shrinkage.

+
+
+
+ +
+ +
+ + + +

+ ld_score + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The LD score of each variant in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ maf + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The minor allele frequency (MAF) of the alternative allele (A1) in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ n_neighbors + + + property + + +

+ + +
+ +

The number of variants in the LD window for each SNP.

+
+

See Also

+ +
+
+

Note

+

This includes the variant itself if the matrix is in memory and is symmetric.

+
+
+ +
+ +
+ + + +

+ n_snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the LD matrix. If the matrix is loaded and filtered, we return the number of variants remaining after applying the filter.

+
+
+
+ +
+ +
+ + + +

+ row_indices + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The row indices of the non-zero elements of the sparse, CSR representation of the LD matrix

+
+
+
+ +
+ +
+ + + +

+ sample_size + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size used to compute the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ shape + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The shape of the square LD matrix.

+
+
+
+ +
+ +
+ + + +

+ snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

rsIDs of the variants included in the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ store + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The Zarr group store object.

+
+
+
+ +
+ +
+ + + +

+ stored_dtype + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The data type for the stored entries of data array of the LD matrix.

+
+
+
+ +
+ +
+ + + +

+ stored_n_snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants stored in the LD matrix (irrespective of any masks / filters).

+
+
+
+ +
+ +
+ + + +

+ stored_shape + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The shape of the stored LD matrix (irrespective of any masks / filters).

+
+
+
+ +
+ +
+ + + +

+ window_size + + + property + + +

+ + +
+ +
+

See Also

+ +
+
+

Note

+

This includes the variant itself if the matrix is in memory and is symmetric.

+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the LD window for each SNP.

+
+
+
+ +
+ +
+ + + +

+ zarr_group + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The Zarr group object that stores the LD matrix and its metadata.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(zarr_group, symmetric=False) + +

+ + +
+ +

Initialize an LDMatrix object from a Zarr group store.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
zarr_group + +
+

The Zarr group object that stores the LD matrix.

+
+
+ required +
symmetric + +
+

A boolean flag indicating whether to represent the LD matrix as symmetric.

+
+
+ False +
+ +
+ Source code in magenpy/LDMatrix.py +
53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
def __init__(self, zarr_group, symmetric=False):
+    """
+    Initialize an `LDMatrix` object from a Zarr group store.
+
+    :param zarr_group: The Zarr group object that stores the LD matrix.
+    :param symmetric: A boolean flag indicating whether to represent the LD matrix as symmetric.
+    """
+
+    # Checking the input for correct formatting:
+    # First, it has to be a Zarr group:
+    assert isinstance(zarr_group, zarr.hierarchy.Group)
+    # Second, it has to have a group called `matrix`:
+    assert 'matrix' in list(zarr_group.group_keys())
+
+    # Third, all the sparse array keys must be present:
+    arr_keys = list(zarr_group['matrix'].array_keys())
+    assert all([arr in arr_keys
+                for arr in ('data', 'indptr')])
+
+    self._zg = zarr_group
+
+    self._mat = None
+    self.in_memory = False
+    self.is_symmetric = symmetric
+    self.index = 0
+
+    self._mask = None
+
+
+
+ +
+ + +
+ + + +

+ __iter__() + +

+ + +
+ +

TODO: Add a flag to allow for chunked iterator, with limited memory footprint.

+ +
+ Source code in magenpy/LDMatrix.py +
def __iter__(self):
+    """
+    TODO: Add a flag to allow for chunked iterator, with limited memory footprint.
+    """
+    self.index = 0
+    self.load(return_symmetric=self.is_symmetric)
+    return self
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_scores(annotation_matrix=None, corrected=True, chunk_size=10000) + +

+ + +
+ +

Computes the LD scores for variants in the LD matrix. LD Scores are defined +as the sum of the squared pairwise Pearson Correlation coefficient between the focal SNP and +all its neighboring SNPs. See Bulik-Sullivan et al. (2015) for details.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annotation_matrix + +
+

A matrix of annotations for each variant for which to aggregate the LD scores.

+
+
+ None +
corrected + +
+

Use the sample-size corrected estimator for the squared Pearson correlation coefficient. See Bulik-Sullivan et al. (2015).

+
+
+ True +
chunk_size + +
+

Specify the number of rows (i.e. SNPs) to compute the LD scores for simultaneously. Smaller chunk sizes should require less memory resources. If set to None, we compute LD scores for all SNPs in the LD matrix in one go.

+
+
+ 10000 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An array of LD scores for each variant in the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def compute_ld_scores(self,
+                      annotation_matrix=None,
+                      corrected=True,
+                      chunk_size=10_000):
+    """
+
+    Computes the LD scores for variants in the LD matrix. LD Scores are defined
+    as the sum of the squared pairwise Pearson Correlation coefficient between the focal SNP and
+    all its neighboring SNPs. See Bulik-Sullivan et al. (2015) for details.
+
+    :param annotation_matrix: A matrix of annotations for each variant for which to aggregate the LD scores.
+    :param corrected: Use the sample-size corrected estimator for the squared Pearson correlation coefficient.
+        See Bulik-Sullivan et al. (2015).
+    :param chunk_size: Specify the number of rows (i.e. SNPs) to compute the LD scores for simultaneously.
+        Smaller chunk sizes should require less memory resources. If set to None, we compute LD scores
+        for all SNPs in the LD matrix in one go.
+
+    :return: An array of LD scores for each variant in the LD matrix.
+    """
+
+    if chunk_size is None:
+        chunk_size = self.stored_n_snps
+
+    if annotation_matrix is None:
+        annotation_matrix = np.ones((self.n_snps, 1), dtype=np.float32)
+
+    ld_scores = np.zeros((self.n_snps, annotation_matrix.shape[1]))
+
+    for chunk_idx in range(int(np.ceil(self.stored_n_snps / chunk_size))):
+
+        start_row = chunk_idx*chunk_size
+        end_row = (chunk_idx + 1)*chunk_size
+
+        csr_mat = self.load_rows(start_row=start_row,
+                                 end_row=end_row,
+                                 return_symmetric=False,
+                                 fill_diag=False,
+                                 dtype=np.float32)
+
+        # If a mask is set, apply it to the matrix:
+        if self._mask is not None:
+            csr_mat = csr_mat[self._mask, :][:, self._mask]
+
+        mat_sq = csr_mat.power(2)
+
+        if corrected:
+            mat_sq.data -= (1. - mat_sq.data) / (self.sample_size - 2)
+
+        ld_scores += mat_sq.dot(annotation_matrix)
+        ld_scores += mat_sq.T.dot(annotation_matrix)
+
+    # Add the contribution of the diagonal:
+    ld_scores += identity(self.n_snps, dtype=np.float32).dot(annotation_matrix)
+
+    # Set floating type to float32:
+    ld_scores = ld_scores.astype(np.float32)
+
+    if ld_scores.shape[1] == 1:
+        return ld_scores.flatten()
+    else:
+        return ld_scores
+
+
+
+ +
+ + +
+ + + +

+ dot(vec) + +

+ + +
+ +

Multiply the LD matrix with an input vector vec.

+
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The product of the LD matrix with the input vector.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def dot(self, vec):
+    """
+    Multiply the LD matrix with an input vector `vec`.
+
+    !!! seealso "See Also"
+        * [multiply][magenpy.LDMatrix.LDMatrix.multiply]
+
+    :return: The product of the LD matrix with the input vector.
+
+    """
+    return self.multiply(vec)
+
+
+
+ +
+ + +
+ + + +

+ estimate_uncompressed_size(dtype=None) + +

+ + +
+ +

Provide an estimate of size of the uncompressed LD matrix in megabytes (MB). +This is only a rough estimate. Depending on how the LD matrix is loaded, the actual size +may be much larger than this estimate.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The estimated size of the uncompressed LD matrix in MB.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def estimate_uncompressed_size(self, dtype=None):
+    """
+    Provide an estimate of size of the uncompressed LD matrix in megabytes (MB).
+    This is only a rough estimate. Depending on how the LD matrix is loaded, the actual size
+    may be much larger than this estimate.
+
+    :return: The estimated size of the uncompressed LD matrix in MB.
+
+    """
+
+    if dtype is None:
+        dtype = self.stored_dtype
+
+    return 2.*self._zg['matrix/data'].shape[0]*np.dtype(dtype).itemsize / 1024 ** 2
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None) + +

+ + +
+ +

Filter the LDMatrix to keep a subset of variants. This mainly sets +the mask for the LD matrix, which is used to hide/remove some SNPs from the LD matrix, +without altering the stored objects on-disk.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list or array of SNP rsIDs to keep.

+
+
+ None +
extract_file + +
+

A plink-style file containing the SNP rsIDs to keep.

+
+
+ None +
+ +
+ Source code in magenpy/LDMatrix.py +
def filter_snps(self, extract_snps=None, extract_file=None):
+    """
+    Filter the LDMatrix to keep a subset of variants. This mainly sets
+    the mask for the LD matrix, which is used to hide/remove some SNPs from the LD matrix,
+    without altering the stored objects on-disk.
+
+    :param extract_snps: A list or array of SNP rsIDs to keep.
+    :param extract_file: A plink-style file containing the SNP rsIDs to keep.
+    """
+
+    assert extract_snps is not None or extract_file is not None
+
+    if extract_snps is None:
+        from .parsers.misc_parsers import read_snp_filter_file
+        extract_snps = read_snp_filter_file(extract_file)
+
+    from .utils.compute_utils import intersect_arrays
+
+    new_mask = intersect_arrays(self.get_metadata('snps', apply_mask=False),
+                                extract_snps,
+                                return_index=True)
+
+    self.set_mask(new_mask)
+
+
+
+ +
+ + +
+ + + +

+ from_csr(csr_mat, store_path, overwrite=False, dtype='int16', compressor_name='lz4', compression_level=5) + + + classmethod + + +

+ + +
+ +

Initialize an LDMatrix object from a sparse CSR matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
csr_mat + +
+

The sparse CSR matrix.

+
+
+ required +
store_path + +
+

The path to the Zarr LD store where the data will be stored.

+
+
+ required +
overwrite + +
+

If True, it overwrites the LD store at store_path.

+
+
+ False +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor or compression algorithm to use with Zarr.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use with the compressor (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_csr(cls,
+             csr_mat,
+             store_path,
+             overwrite=False,
+             dtype='int16',
+             compressor_name='lz4',
+             compression_level=5):
+    """
+    Initialize an LDMatrix object from a sparse CSR matrix.
+
+    :param csr_mat: The sparse CSR matrix.
+    :param store_path: The path to the Zarr LD store where the data will be stored.
+    :param overwrite: If True, it overwrites the LD store at `store_path`.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+    :param compression_level: The compression level to use with the compressor (1-9).
+    """
+
+    dtype = np.dtype(dtype)
+
+    # Get the upper triangular part of the matrix:
+    triu_mat = triu(csr_mat, k=1, format='csr')
+
+    # Check that the non-zeros are contiguous around the diagonal with no gaps.
+    # If there are gaps, eliminate them or raise an error.
+    if np.diff(triu_mat.indices).max() > 1:
+        # TODO: Figure out a way to fix this automatically for the user?
+        raise ValueError("The non-zero entries of the LD matrix are not contiguous around the diagonal.")
+
+    # Create hierarchical storage with zarr groups:
+    store = zarr.DirectoryStore(store_path)
+    z = zarr.group(store=store, overwrite=overwrite)
+
+    # Create a compressor object:
+    compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+    # First sub-hierarchy stores the information for the sparse LD matrix:
+    mat = z.create_group('matrix')
+    if np.issubdtype(dtype, np.integer):
+        mat.array('data', quantize(triu_mat.data, int_dtype=dtype), dtype=dtype, compressor=compressor)
+    else:
+        mat.array('data', triu_mat.data.astype(dtype), dtype=dtype, compressor=compressor_name)
+
+    # Store the index pointer:
+    mat.array('indptr', triu_mat.indptr,
+              dtype=np.int32, compressor=compressor)
+
+    return cls(z)
+
+
+
+ +
+ + +
+ + + +

+ from_dense_zarr_matrix(dense_zarr, ld_boundaries, store_path, overwrite=False, delete_original=False, dtype='int16', compressor_name='lz4', compression_level=5) + + + classmethod + + +

+ + +
+ +

Initialize a new LD matrix object using a Zarr array object. This method is +useful for converting a dense LD matrix computed using Dask (or other distributed computing +software) to a sparse or banded one.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dense_zarr + +
+

The path to the dense Zarr array object.

+
+
+ required +
ld_boundaries + +
+

The LD boundaries for each SNP in the LD matrix (delineates the indices of the leftmost and rightmost neighbors of each SNP).

+
+
+ required +
store_path + +
+

The path where to store the new LD matrix.

+
+
+ required +
overwrite + +
+

If True, it overwrites the LD store at store_path.

+
+
+ False +
delete_original + +
+

If True, it deletes the original dense LD matrix.

+
+
+ False +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor or compression algorithm to use with Zarr.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use with the compressor (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_dense_zarr_matrix(cls,
+                           dense_zarr,
+                           ld_boundaries,
+                           store_path,
+                           overwrite=False,
+                           delete_original=False,
+                           dtype='int16',
+                           compressor_name='lz4',
+                           compression_level=5):
+    """
+     Initialize a new LD matrix object using a Zarr array object. This method is
+     useful for converting a dense LD matrix computed using Dask (or other distributed computing
+     software) to a sparse or banded one.
+
+     :param dense_zarr: The path to the dense Zarr array object.
+     :param ld_boundaries: The LD boundaries for each SNP in the LD matrix (delineates the indices of
+        the leftmost and rightmost neighbors of each SNP).
+     :param store_path: The path where to store the new LD matrix.
+     :param overwrite: If True, it overwrites the LD store at `store_path`.
+     :param delete_original: If True, it deletes the original dense LD matrix.
+     :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+     :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+     :param compression_level: The compression level to use with the compressor (1-9).
+    """
+
+    dtype = np.dtype(dtype)
+
+    # If dense_zarr is a path, rather than a Zarr Array object, then
+    # open it as a Zarr array object before proceeding:
+    if isinstance(dense_zarr, str):
+        if osp.isfile(osp.join(dense_zarr, '.zarray')):
+            dense_zarr = zarr.open(dense_zarr)
+        else:
+            raise FileNotFoundError
+
+    # Create hierarchical storage with zarr groups:
+    store = zarr.DirectoryStore(store_path)
+    z = zarr.group(store=store, overwrite=overwrite)
+
+    # Create a compressor object:
+    compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+    # First sub-hierarchy stores the information for the sparse LD matrix:
+    mat = z.create_group('matrix')
+    mat.empty('data', shape=dense_zarr.shape[0]**2, dtype=dtype, compressor=compressor)
+
+    num_rows = dense_zarr.shape[0]
+    chunk_size = dense_zarr.chunks[0]
+
+    indptr_counts = np.zeros(num_rows, dtype=int)
+
+    total_len = 0
+
+    for chunk_idx in range(int(np.ceil(num_rows / chunk_size))):
+
+        chunk_start = chunk_idx * chunk_size
+        chunk_end = min((chunk_idx + 1) * chunk_size, num_rows)
+
+        z_chunk = dense_zarr[chunk_start: chunk_end]
+
+        data = []
+
+        chunk_len = 0
+
+        for j in range(chunk_start, chunk_end):
+
+            data.append(
+                z_chunk[j - chunk_start][j + 1:ld_boundaries[1, j]]
+            )
+            indptr_counts[j] = len(data[-1])
+            chunk_len += int(ld_boundaries[1, j] - (j+1))
+
+        # Add data + columns indices to zarr array:
+        concat_data = np.concatenate(data)
+
+        if np.issubdtype(dtype, np.integer):
+            mat['data'][total_len:total_len + chunk_len] = quantize(concat_data, int_dtype=dtype)
+        else:
+            mat['data'][total_len:total_len + chunk_len] = concat_data.astype(dtype)
+
+        total_len += chunk_len
+
+    # Get the final indptr by computing cumulative sum:
+    indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+    # Store indptr in the zarr array:
+    mat.array('indptr', indptr, compressor=compressor)
+
+    # Resize the data and indices arrays:
+    mat['data'].resize(total_len)
+
+    if delete_original:
+        from .stats.ld.utils import delete_ld_store
+        delete_ld_store(dense_zarr)
+
+    return cls(z)
+
+
+
+ +
+ + +
+ + + +

+ from_dir(ld_store_path) + + + classmethod + + +

+ + +
+ +

Initialize an LDMatrix object from a Zarr array store.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ld_store_path + +
+

The path to the Zarr array store on the filesystem. !!! seealso "See Also" * from_path

+
+
+ required +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_dir(cls, ld_store_path):
+    """
+    Initialize an `LDMatrix` object from a Zarr array store.
+    :param ld_store_path: The path to the Zarr array store on the filesystem.
+
+    !!! seealso "See Also"
+        * [from_path][magenpy.LDMatrix.LDMatrix.from_path]
+    """
+    return cls.from_path(ld_store_path)
+
+
+
+ +
+ + +
+ + + +

+ from_path(ld_store_path) + + + classmethod + + +

+ + +
+ +

Initialize an LDMatrix object from a pre-computed Zarr group store.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ld_store_path + +
+

The path to the Zarr array store on the filesystem. !!! seealso "See Also" * from_dir

+
+
+ required +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_path(cls, ld_store_path):
+    """
+    Initialize an `LDMatrix` object from a pre-computed Zarr group store.
+    :param ld_store_path: The path to the Zarr array store on the filesystem.
+
+    !!! seealso "See Also"
+        * [from_dir][magenpy.LDMatrix.LDMatrix.from_dir]
+
+    """
+
+    for level in range(2):
+        try:
+            ld_group = zarr.open_group(ld_store_path, mode='r')
+            return cls(ld_group)
+        except zarr.hierarchy.GroupNotFoundError as e:
+            if level < 1:
+                ld_store_path = osp.dirname(ld_store_path)
+            else:
+                raise e
+
+
+
+ +
+ + +
+ + + + + + +
+ +

Construct a Zarr LD matrix using output tables from plink1.9. +This class method takes the following inputs:

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
plink_ld_file + +
+

The path to the plink LD table file.

+
+
+ required +
snps + +
+

An iterable containing the list of SNPs in the LD matrix.

+
+
+ required +
store_path + +
+

The path to the Zarr LD store.

+
+
+ required +
pandas_chunksize + +
+

If the LD table is large, provide chunk size (i.e. number of rows to process at each step) to keep memory footprint manageable.

+
+
+ None +
overwrite + +
+

If True, it overwrites the LD store at store_path.

+
+
+ False +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor or compression algorithm to use with Zarr.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use with the compressor (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_plink_table(cls,
+                     plink_ld_file,
+                     snps,
+                     store_path,
+                     pandas_chunksize=None,
+                     overwrite=False,
+                     dtype='int16',
+                     compressor_name='lz4',
+                     compression_level=5):
+    """
+    Construct a Zarr LD matrix using output tables from plink1.9.
+    This class method takes the following inputs:
+
+    :param plink_ld_file: The path to the plink LD table file.
+    :param snps: An iterable containing the list of SNPs in the LD matrix.
+    :param store_path: The path to the Zarr LD store.
+    :param pandas_chunksize: If the LD table is large, provide chunk size
+    (i.e. number of rows to process at each step) to keep memory footprint manageable.
+    :param overwrite: If True, it overwrites the LD store at `store_path`.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+    :param compression_level: The compression level to use with the compressor (1-9).
+    """
+
+    dtype = np.dtype(dtype)
+
+    # Create hierarchical storage with zarr groups:
+    store = zarr.DirectoryStore(store_path)
+    z = zarr.group(store=store, overwrite=overwrite)
+
+    # Create a compressor object:
+    compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+    # First sub-hierarchy stores the information for the sparse LD matrix:
+    mat = z.create_group('matrix')
+    mat.empty('data', shape=len(snps)**2, dtype=dtype, compressor=compressor)
+
+    # Create a chunked iterator with pandas:
+    # Chunk size will correspond to the average chunk size for the Zarr array:
+    ld_chunks = pd.read_csv(plink_ld_file,
+                            sep=r'\s+',
+                            usecols=['SNP_A', 'R'],
+                            engine='c',
+                            chunksize=pandas_chunksize,
+                            dtype={'SNP_A': str, 'R': np.float32})
+
+    if pandas_chunksize is None:
+        ld_chunks = [ld_chunks]
+
+    # Create a dictionary mapping SNPs to their indices:
+    snp_dict = dict(zip(snps, np.arange(len(snps))))
+
+    indptr_counts = np.zeros(len(snps), dtype=np.int32)
+
+    total_len = 0
+
+    # For each chunk in the LD file:
+    for ld_chunk in ld_chunks:
+
+        # Create an indexed LD chunk:
+        ld_chunk['row_index'] = ld_chunk['SNP_A'].map(snp_dict)
+
+        # Add LD data to the zarr array:
+        if np.issubdtype(dtype, np.integer):
+            mat['data'][total_len:total_len + len(ld_chunk)] = quantize(ld_chunk['R'].values, int_dtype=dtype)
+        else:
+            mat['data'][total_len:total_len + len(ld_chunk)] = ld_chunk['R'].values.astype(dtype)
+
+        total_len += len(ld_chunk)
+
+        # Group by the row index:
+        grouped_ridx = ld_chunk.groupby('row_index').size()
+
+        # Add the number of entries to indptr_counts:
+        indptr_counts[grouped_ridx.index] += grouped_ridx.values
+
+    # Get the final indptr by computing cumulative sum:
+    indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+    # Store indptr in the zarr group:
+    mat.array('indptr', indptr, dtype=np.int32, compressor=compressor)
+
+    # Resize the data array:
+    mat['data'].resize(total_len)
+
+    return cls(z)
+
+
+
+ +
+ + +
+ + + +

+ from_ragged_zarr_matrix(ragged_zarr, store_path, overwrite=False, delete_original=False, dtype='int16', compressor_name='lz4', compression_level=5) + + + classmethod + + +

+ + +
+ +

Initialize a new LD matrix object using a Zarr array object +conforming to the old LD Matrix format from magenpy v<=0.0.12.

+

This utility function will also copy some of the stored attributes +associated with the matrix in the old format.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ragged_zarr + +
+

The path to the ragged Zarr array object.

+
+
+ required +
store_path + +
+

The path where to store the new LD matrix.

+
+
+ required +
overwrite + +
+

If True, it overwrites the LD store at store_path.

+
+
+ False +
delete_original + +
+

If True, it deletes the original ragged LD matrix.

+
+
+ False +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor or compression algorithm to use with Zarr.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use with the compressor (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/LDMatrix.py +
@classmethod
+def from_ragged_zarr_matrix(cls,
+                            ragged_zarr,
+                            store_path,
+                            overwrite=False,
+                            delete_original=False,
+                            dtype='int16',
+                            compressor_name='lz4',
+                            compression_level=5):
+    """
+    Initialize a new LD matrix object using a Zarr array object
+    conforming to the old LD Matrix format from magenpy v<=0.0.12.
+
+    This utility function will also copy some of the stored attributes
+    associated with the matrix in the old format.
+
+    :param ragged_zarr: The path to the ragged Zarr array object.
+    :param store_path: The path where to store the new LD matrix.
+    :param overwrite: If True, it overwrites the LD store at `store_path`.
+    :param delete_original: If True, it deletes the original ragged LD matrix.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor or compression algorithm to use with Zarr.
+    :param compression_level: The compression level to use with the compressor (1-9).
+    """
+
+    dtype = np.dtype(dtype)
+
+    # If ragged_zarr is a path, rather than a Zarr Array object, then
+    # open it as a Zarr array object before proceeding:
+    if isinstance(ragged_zarr, str):
+        if osp.isfile(osp.join(ragged_zarr, '.zarray')):
+            ragged_zarr = zarr.open(ragged_zarr)
+        else:
+            raise FileNotFoundError
+
+    num_rows = ragged_zarr.shape[0]
+    chunk_size = ragged_zarr.chunks[0]
+
+    # Create hierarchical storage with zarr groups:
+    store = zarr.DirectoryStore(store_path)
+    z = zarr.group(store=store, overwrite=overwrite)
+
+    # Create a compressor object:
+    compressor = zarr.Blosc(cname=compressor_name, clevel=compression_level)
+
+    # First sub-hierarchy stores the information for the sparse LD matrix:
+    mat = z.create_group('matrix')
+    mat.empty('data', shape=num_rows ** 2, dtype=dtype, compressor=compressor)
+
+    indptr_counts = np.zeros(num_rows, dtype=int)
+
+    # Get the LD boundaries from the Zarr array attributes:
+    ld_boundaries = np.array(ragged_zarr.attrs['LD boundaries'])
+
+    total_len = 0
+
+    for chunk_idx in range(int(np.ceil(num_rows / chunk_size))):
+
+        chunk_start = chunk_idx * chunk_size
+        chunk_end = min((chunk_idx + 1) * chunk_size, num_rows)
+
+        z_chunk = ragged_zarr[chunk_start: chunk_end]
+
+        data = []
+        chunk_len = 0
+
+        for j in range(chunk_start, chunk_end):
+
+            start, end = ld_boundaries[:, j]
+            new_start = (j - start) + 1
+
+            data.append(
+                z_chunk[j - chunk_start][new_start:]
+            )
+            indptr_counts[j] = end - (j + 1)
+            chunk_len += int(end - (j + 1))
+
+        # Add data + columns indices to zarr array:
+        concat_data = np.concatenate(data)
+
+        if np.issubdtype(dtype, np.integer):
+            mat['data'][total_len:total_len + chunk_len] = quantize(concat_data, int_dtype=dtype)
+        else:
+            mat['data'][total_len:total_len + chunk_len] = concat_data.astype(dtype)
+
+        total_len += chunk_len
+
+    # Get the final indptr by computing cumulative sum:
+    indptr = np.insert(np.cumsum(indptr_counts), 0, 0)
+    # Store indptr in the zarr array:
+    mat.array('indptr', indptr, compressor=compressor)
+
+    # Resize the data and indices arrays:
+    mat['data'].resize(total_len)
+
+    # ============================================================
+    # Transfer the attributes/metadata from the old matrix format:
+
+    ld_mat = cls(z)
+
+    ld_mat.set_metadata('snps', np.array(ragged_zarr.attrs['SNP']))
+    ld_mat.set_metadata('a1', np.array(ragged_zarr.attrs['A1']))
+    ld_mat.set_metadata('a2', np.array(ragged_zarr.attrs['A2']))
+    ld_mat.set_metadata('maf', np.array(ragged_zarr.attrs['MAF']))
+    ld_mat.set_metadata('bp', np.array(ragged_zarr.attrs['BP']))
+    ld_mat.set_metadata('cm', np.array(ragged_zarr.attrs['cM']))
+
+    try:
+        ld_mat.set_metadata('ldscore', np.array(ragged_zarr.attrs['LDScore']))
+    except KeyError:
+        print("Did not find LD scores in old LD matrix format! Skipping...")
+
+    # Set matrix attributes:
+    ld_mat.set_store_attr('Chromosome', ragged_zarr.attrs['Chromosome'])
+    ld_mat.set_store_attr('LD estimator', ragged_zarr.attrs['LD estimator'])
+    ld_mat.set_store_attr('Estimator properties', ragged_zarr.attrs['Estimator properties'])
+    ld_mat.set_store_attr('Sample size', ragged_zarr.attrs['Sample size'])
+
+    if delete_original:
+        from .stats.ld.utils import delete_ld_store
+        delete_ld_store(ragged_zarr)
+
+    return ld_mat
+
+
+
+ +
+ + +
+ + + +

+ get_mask() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The mask (a boolean flag array) used to hide/remove some SNPs from the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def get_mask(self):
+    """
+    :return: The mask (a boolean flag array) used to hide/remove some SNPs from the LD matrix.
+    """
+    return self._mask
+
+
+
+ +
+ + +
+ + + +

+ get_metadata(key, apply_mask=True) + +

+ + +
+ +

Get the metadata associated with each variant in the LD matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
key + +
+

The key for the metadata item.

+
+
+ required +
apply_mask + +
+

If True, apply the mask (e.g. filter) to the metadata.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The metadata item for each variant in the LD matrix.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

if the metadata item is not set.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def get_metadata(self, key, apply_mask=True):
+    """
+    Get the metadata associated with each variant in the LD matrix.
+    :param key: The key for the metadata item.
+    :param apply_mask: If True, apply the mask (e.g. filter) to the metadata.
+
+    :return: The metadata item for each variant in the LD matrix.
+    :raises KeyError: if the metadata item is not set.
+    """
+    try:
+        if self._mask is not None and apply_mask:
+            return self._zg[f'metadata/{key}'][self._mask]
+        else:
+            return self._zg[f'metadata/{key}'][:]
+    except KeyError:
+        raise KeyError(f"LD matrix metadata item {key} is not set!")
+
+
+
+ +
+ + +
+ + + +

+ get_row(index, return_indices=False) + +

+ + +
+ +

Extract a single row from the LD matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
index + +
+

The index of the row to extract.

+
+
+ required +
return_indices + +
+

If True, return the indices of the non-zero elements of that row.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The requested row of the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def get_row(self, index, return_indices=False):
+    """
+    Extract a single row from the LD matrix.
+
+    :param index: The index of the row to extract.
+    :param return_indices: If True, return the indices of the non-zero elements of that row.
+
+    :return: The requested row of the LD matrix.
+    """
+
+    if self.in_memory:
+        row = self.csr_matrix.getrow(index)
+        if return_indices:
+            return row.data, row.indices
+        else:
+            return row.data
+    else:
+        indptr = self.indptr[:]
+        start_idx, end_idx = indptr[index], indptr[index + 1]
+        if return_indices:
+            return self.data[start_idx:end_idx], np.arange(index + 1,
+                                                           index + 1 + (indptr[index + 1] - indptr[index]))
+        else:
+            return self.data[start_idx:end_idx]
+
+
+
+ +
+ + +
+ + + +

+ get_store_attr(attr) + +

+ + +
+ +

Get the attribute or metadata attr associated with the LD matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
attr + +
+

The attribute name.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The value for the attribute.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

if the attribute is not set.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def get_store_attr(self, attr):
+    """
+    Get the attribute or metadata `attr` associated with the LD matrix.
+    :param attr: The attribute name.
+
+    :return: The value for the attribute.
+    :raises KeyError: if the attribute is not set.
+    """
+    try:
+        return self._zg.attrs[attr]
+    except KeyError:
+        print(f"Warning: Attribute '{attr}' is not set!")
+        return None
+
+
+
+ +
+ + +
+ + + +

+ load(force_reload=False, return_symmetric=True, fill_diag=True, dtype=None) + +

+ + +
+ +

Load the LD matrix from on-disk storage in the form of Zarr arrays to memory, +in the form of sparse CSR matrices.

+
+

See Also

+ +
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
force_reload + +
+

If True, it will reload the data even if it is already in memory.

+
+
+ False +
return_symmetric + +
+

If True, return a full symmetric representation of the LD matrix.

+
+
+ True +
fill_diag + +
+

If True, fill the diagonal elements of the LD matrix with ones.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The LD matrix as a sparse CSR matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def load(self,
+         force_reload=False,
+         return_symmetric=True,
+         fill_diag=True,
+         dtype=None):
+
+    """
+    Load the LD matrix from on-disk storage in the form of Zarr arrays to memory,
+    in the form of sparse CSR matrices.
+
+    !!! seealso "See Also"
+        * [low_memory_load][magenpy.LDMatrix.LDMatrix.low_memory_load]
+        * [load_rows][magenpy.LDMatrix.LDMatrix.load_rows]
+
+    :param force_reload: If True, it will reload the data even if it is already in memory.
+    :param return_symmetric: If True, return a full symmetric representation of the LD matrix.
+    :param fill_diag: If True, fill the diagonal elements of the LD matrix with ones.
+    :param dtype: The data type for the entries of the LD matrix.
+
+    :return: The LD matrix as a sparse CSR matrix.
+    """
+
+    if dtype is not None:
+        dtype = np.dtype(dtype)
+
+    if self.in_memory:
+        # If the LD matrix is already in memory:
+
+        if (return_symmetric == self.is_symmetric) and not force_reload:
+            # If the requested symmetry is the same as the one already loaded,
+            # and the user asked not to force a reload, then do nothing.
+
+            # If the currently loaded LD matrix has float entries and the user wants
+            # the return type to be another floating point, then just cast and return.
+            # Otherwise, we have to reload the matrix:
+            if np.issubdtype(self._mat.data.dtype, np.floating) and np.issubdtype(dtype, np.floating):
+                self._mat.data = self._mat.data.astype(dtype)
+                return
+            elif self._mat.data.dtype == dtype:
+                return
+
+    # If we are re-loading the matrix, make sure to release the current one:
+    self.release()
+
+    self._mat = self.load_rows(return_symmetric=return_symmetric,
+                               fill_diag=fill_diag,
+                               dtype=dtype)
+
+    # If a mask is set, apply it:
+    if self._mask is not None:
+        self._mat = self._mat[self._mask, :][:, self._mask]
+
+    # Update the flags:
+    self.in_memory = True
+    self.is_symmetric = return_symmetric
+
+
+
+ +
+ + +
+ + + +

+ load_rows(start_row=None, end_row=None, return_symmetric=False, fill_diag=False, keep_shape=True, dtype=None) + +

+ + +
+ +

A utility function to allow for loading a subset of the LD matrix. +By specifying start_row and end_row, the user can process or inspect small +blocks of the LD matrix without loading the whole thing into memory.

+

TODO: Consider using low_memory_load internally to avoid reconstructing the indices array.

+
+

Note

+

This method does not perform any filtering on the stored data. +To access the LD matrix with filtering, use .load() or low_memory_load.

+
+
+

See Also

+ +
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
start_row + +
+

The start row to load to memory

+
+
+ None +
end_row + +
+

The end row (not inclusive) to load to memory

+
+
+ None +
return_symmetric + +
+

If True, return a full symmetric representation of the LD matrix.

+
+
+ False +
fill_diag + +
+

If True, fill the diagonal of the LD matrix with ones.

+
+
+ False +
keep_shape + +
+

If True, return the LD matrix with the same shape as the original. Here, entries that are outside the requested start_row:end_row region will be zeroed out.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The requested sub-matrix of the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def load_rows(self,
+              start_row=None,
+              end_row=None,
+              return_symmetric=False,
+              fill_diag=False,
+              keep_shape=True,
+              dtype=None):
+    """
+    A utility function to allow for loading a subset of the LD matrix.
+    By specifying `start_row` and `end_row`, the user can process or inspect small
+    blocks of the LD matrix without loading the whole thing into memory.
+
+    TODO: Consider using `low_memory_load` internally to avoid reconstructing the `indices` array.
+
+    !!! note
+        This method does not perform any filtering on the stored data.
+        To access the LD matrix with filtering, use `.load()` or `low_memory_load`.
+
+    !!! seealso "See Also"
+        * [low_memory_load][magenpy.LDMatrix.LDMatrix.low_memory_load]
+        * [load][magenpy.LDMatrix.LDMatrix.load]
+
+    :param start_row: The start row to load to memory
+    :param end_row: The end row (not inclusive) to load to memory
+    :param return_symmetric: If True, return a full symmetric representation of the LD matrix.
+    :param fill_diag: If True, fill the diagonal of the LD matrix with ones.
+    :param keep_shape: If True, return the LD matrix with the same shape as the original. Here,
+    entries that are outside the requested start_row:end_row region will be zeroed out.
+    :param dtype: The data type for the entries of the LD matrix.
+
+    :return: The requested sub-matrix of the LD matrix.
+    """
+
+    # Determine the final data type for the LD matrix entries
+    # and whether we need to perform dequantization or not depending on
+    # the stored data type and the requested data type.
+    if dtype is None:
+        dtype = self.stored_dtype
+        dequantize_data = False
+    else:
+        dtype = np.dtype(dtype)
+        if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(dtype, np.floating):
+            dequantize_data = True
+        else:
+            dequantize_data = False
+
+    # Sanity checking + forming the dimensions of the
+    # requested sub-matrix:
+    n_snps = self.stored_n_snps
+
+    start_row = start_row or 0
+    end_row = end_row or n_snps
+
+    # Sanity checking:
+    assert start_row >= 0
+    end_row = min(end_row, n_snps)
+
+    # Load the index pointer from disk:
+    indptr = self._zg['matrix/indptr'][:]
+
+    # Determine the start and end positions in the data matrix
+    # based on the requested start and end rows:
+    data_start = indptr[start_row]
+    data_end = indptr[end_row]
+
+    # If the user is requesting a subset of the matrix, then we need to adjust
+    # the index pointer accordingly:
+    if start_row > 0 or end_row < n_snps:
+        # Zero out all index pointers before `start_row`:
+        indptr = np.clip(indptr - data_start, a_min=0, a_max=None)
+        # Adjust all index pointers after `end_row`:
+        indptr[end_row+1:] = (data_end - data_start)
+
+    # Extract the data for the requested rows:
+    csr_data = self._zg['matrix/data'][data_start:data_end]
+
+    # If we need to de-quantize the data, do it now:
+    if dequantize_data:
+        csr_data = dequantize(csr_data, float_dtype=dtype)
+
+    # Construct a CSR matrix from the loaded data, updated indptr, and indices:
+
+    # Get the indices array:
+    if self.in_memory:
+        # If the matrix (or a version of it) is already loaded,
+        # then set the `in_memory` flag to False before fetching the indices.
+        self.in_memory = False
+        indices = self.indices
+        self.in_memory = True
+    else:
+        indices = self.indices
+
+    mat = csr_matrix(
+        (
+            csr_data,
+            indices[data_start:data_end],
+            indptr
+        ),
+        shape=(n_snps, n_snps),
+        dtype=dtype
+    )
+
+    # Determine the "invalid" value for the purposes of reconstructing
+    # the symmetric matrix:
+    if np.issubdtype(dtype, np.integer):
+        # For integers, we don't use the minimum value during quantization
+        # because we would like to have the zero point at exactly zero. So,
+        # we can use this value as our alternative to `nan`.
+        invalid_value = np.iinfo(dtype).min
+        identity_val = np.iinfo(dtype).max
+    else:
+        invalid_value = np.nan
+        identity_val = 1
+
+    if return_symmetric:
+
+        # First, replace explicit zeros with invalid value (this is a hack to prevent scipy
+        # from eliminating those zeros when making the matrix symmetric):
+        mat.data[mat.data == 0] = invalid_value
+
+        # Add the matrix transpose to make it symmetric:
+        mat = (mat + mat.T).astype(dtype)
+
+        # If the user requested filling the diagonals, do it here:
+        if fill_diag:
+            diag_vals = np.concatenate([np.zeros(start_row, dtype=dtype),
+                                        identity_val*np.ones(end_row - start_row, dtype=dtype),
+                                        np.zeros(n_snps - end_row, dtype=dtype)])
+            mat += diags(diag_vals, dtype=dtype, shape=mat.shape)
+
+        # Replace the invalid values with zeros again:
+        if np.isnan(invalid_value):
+            mat.data[np.isnan(mat.data)] = 0
+        else:
+            mat.data[mat.data == invalid_value] = 0
+
+        return mat
+    elif fill_diag:
+        diag_vals = np.concatenate([np.zeros(start_row, dtype=dtype),
+                                    identity_val*np.ones(end_row - start_row, dtype=dtype),
+                                    np.zeros(n_snps - end_row, dtype=dtype)])
+        mat += diags(diag_vals, dtype=dtype, shape=mat.shape)
+
+    # If the shape remains the same, return the matrix as is.
+    # Otherwise, return the requested sub-matrix:
+    if keep_shape:
+        return mat
+    else:
+        return mat[start_row:end_row, :]
+
+
+
+ +
+ + +
+ + + +

+ low_memory_load(dtype=None) + +

+ + +
+ +

A utility method to load the LD matrix in low-memory mode. +The method will load the entries of the upper triangular portion of the matrix, +perform filtering based on the mask (if set), and return the filtered data +and index pointer (indptr) arrays.

+

This is useful for some application, such as the low_memory version of +the viprs method, because it avoids reconstructing the indices array for the CSR matrix, +which can potentially be a very long array of large integers.

+
+

Note

+

The method, by construction, does not support loading the full symmetric matrix. If +that's the goal, use the .load() or .load_rows() methods.

+
+
+

See Also

+ +
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A tuple of the data and index pointer arrays for the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def low_memory_load(self, dtype=None):
+    """
+    A utility method to load the LD matrix in low-memory mode.
+    The method will load the entries of the upper triangular portion of the matrix,
+    perform filtering based on the mask (if set), and return the filtered data
+    and index pointer (`indptr`) arrays.
+
+    This is useful for some application, such as the `low_memory` version of
+    the `viprs` method, because it avoids reconstructing the `indices` array for the CSR matrix,
+    which can potentially be a very long array of large integers.
+
+    !!! note
+        The method, by construction, does not support loading the full symmetric matrix. If
+        that's the goal, use the `.load()` or `.load_rows()` methods.
+
+    !!! seealso "See Also"
+        * [load_rows][magenpy.LDMatrix.LDMatrix.load_rows]
+        * [load][magenpy.LDMatrix.LDMatrix.load]
+
+    :param dtype: The data type for the entries of the LD matrix.
+
+    :return: A tuple of the data and index pointer arrays for the LD matrix.
+
+    """
+
+    # Determine the final data type for the LD matrix entries
+    # and whether we need to perform dequantization or not depending on
+    # the stored data type and the requested data type.
+
+    if dtype is None:
+        dtype = self.stored_dtype
+        dequantize_data = False
+    else:
+        dtype = np.dtype(dtype)
+        if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(dtype, np.floating):
+            dequantize_data = True
+        else:
+            dequantize_data = False
+
+    # Get the index pointer array:
+    indptr = self._zg['matrix/indptr'][:]
+
+    # Filter the index pointer array based on the mask:
+    if self._mask is not None:
+
+        if np.issubdtype(self._mask.dtype, np.integer):
+            mask = np.zeros(self.stored_n_snps, dtype=np.int8)
+            mask[self._mask] = 1
+        else:
+            mask = self._mask
+
+        from .stats.ld.c_utils import filter_ut_csr_matrix_low_memory
+
+        data_mask, indptr = filter_ut_csr_matrix_low_memory(indptr, mask)
+        # Unfortunately, .vindex is very slow in Zarr right now (~order of magnitude)
+        # So for now, we load the entire data array before performing the mask selection:
+        data = self._zg['matrix/data'][:][data_mask]
+    else:
+        data = self._zg['matrix/data'][:]
+
+    if dequantize_data:
+        return dequantize(data, float_dtype=dtype), indptr
+    else:
+        return data.astype(dtype), indptr
+
+
+
+ +
+ + +
+ + + +

+ multiply(vec) + +

+ + +
+ +

Multiply the LD matrix with an input vector vec.

+
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The product of the LD matrix with the input vector.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def multiply(self, vec):
+    """
+    Multiply the LD matrix with an input vector `vec`.
+
+    !!! seealso "See Also"
+        * [dot][magenpy.LDMatrix.LDMatrix.dot]
+
+    :return: The product of the LD matrix with the input vector.
+    """
+    return self.csr_matrix.dot(vec)
+
+
+
+ +
+ + +
+ + + +

+ release() + +

+ + +
+ +

Release the LD data from memory.

+ +
+ Source code in magenpy/LDMatrix.py +
def release(self):
+    """
+    Release the LD data from memory.
+    """
+    self._mat = None
+    self.in_memory = False
+    self.is_symmetric = False
+    self.index = 0
+
+
+
+ +
+ + +
+ + + +

+ set_mask(mask) + +

+ + +
+ +

Set the mask (a boolean array) to hide/remove some SNPs from the LD matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mask + +
+

An array of indices or boolean mask for SNPs to retain.

+
+
+ required +
+ +
+ Source code in magenpy/LDMatrix.py +
def set_mask(self, mask):
+    """
+    Set the mask (a boolean array) to hide/remove some SNPs from the LD matrix.
+    :param mask: An array of indices or boolean mask for SNPs to retain.
+    """
+
+    # If the mask is equivalent to the current mask, return:
+    if np.array_equal(mask, self._mask):
+        return
+
+    # If the mask is boolean, convert to indices (should we?):
+    if mask.dtype == bool:
+        self._mask = np.where(mask)[0]
+    else:
+        self._mask = mask
+
+    # If the data is already in memory, reload:
+    if self.in_memory:
+        self.load(force_reload=True,
+                  return_symmetric=self.is_symmetric,
+                  fill_diag=self.is_symmetric)
+
+
+
+ +
+ + +
+ + + +

+ set_metadata(key, value, overwrite=False) + +

+ + +
+ +

Set the metadata field associated with variants the LD matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
key + +
+

The key for the metadata item.

+
+
+ required +
value + +
+

The value for the metadata item (an array with the same length as the number of variants).

+
+
+ required +
overwrite + +
+

If True, overwrite the metadata item if it already exists.

+
+
+ False +
+ +
+ Source code in magenpy/LDMatrix.py +
def set_metadata(self, key, value, overwrite=False):
+    """
+    Set the metadata field associated with variants the LD matrix.
+    :param key: The key for the metadata item.
+    :param value: The value for the metadata item (an array with the same length as the number of variants).
+    :param overwrite: If True, overwrite the metadata item if it already exists.
+    """
+
+    if 'metadata' not in list(self._zg.group_keys()):
+        meta = self._zg.create_group('metadata')
+    else:
+        meta = self._zg['metadata']
+
+    value = np.array(value)
+
+    if np.issubdtype(value.dtype, np.floating):
+        dtype = np.float32
+    elif np.issubdtype(value.dtype, np.integer):
+        dtype = np.int32
+    else:
+        dtype = str
+
+    meta.array(key, value, overwrite=overwrite, dtype=dtype, compressor=self.compressor)
+
+
+
+ +
+ + +
+ + + +

+ set_store_attr(attr, value) + +

+ + +
+ +

Set the attribute attr associated with the LD matrix. This is used +to set high-level information, such as information about the sample from which +the matrix was computed, the LD estimator used, its properties, etc.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
attr + +
+

The attribute name.

+
+
+ required +
value + +
+

The value for the attribute.

+
+
+ required +
+ +
+ Source code in magenpy/LDMatrix.py +
def set_store_attr(self, attr, value):
+    """
+    Set the attribute `attr` associated with the LD matrix. This is used
+    to set high-level information, such as information about the sample from which
+    the matrix was computed, the LD estimator used, its properties, etc.
+
+    :param attr: The attribute name.
+    :param value: The value for the attribute.
+    """
+
+    self._zg.attrs[attr] = value
+
+
+
+ +
+ + +
+ + + +

+ to_snp_table(col_subset=None) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

The subset of columns to add to the table. If None, it returns all available columns.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas dataframe of the SNP attributes and metadata for variants included in the LD matrix.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def to_snp_table(self, col_subset=None):
+    """
+    :param col_subset: The subset of columns to add to the table. If None, it returns
+    all available columns.
+
+    :return: A `pandas` dataframe of the SNP attributes and metadata for variants
+    included in the LD matrix.
+    """
+
+    col_subset = col_subset or ['CHR', 'SNP', 'POS', 'A1', 'A2', 'MAF', 'LDScore']
+
+    table = pd.DataFrame({'SNP': self.snps})
+
+    for col in col_subset:
+        if col == 'CHR':
+            table['CHR'] = self.chromosome
+        if col == 'POS':
+            table['POS'] = self.bp_position
+        if col == 'cM':
+            table['cM'] = self.cm_position
+        if col == 'A1':
+            table['A1'] = self.a1
+        if col == 'A2':
+            table['A2'] = self.a2
+        if col == 'MAF':
+            table['MAF'] = self.maf
+        if col == 'LDScore':
+            table['LDScore'] = self.ld_score
+        if col == 'WindowSize':
+            table['WindowSize'] = self.window_size
+
+    return table[list(col_subset)]
+
+
+
+ +
+ + +
+ + + +

+ update_rows_inplace(new_csr, start_row=None, end_row=None) + +

+ + +
+ +

A utility function to perform partial updates to a subset of rows in the +LD matrix. The function takes a new CSR matrix and, optionally, a start +and end row delimiting the chunk of the LD matrix to update with the new_csr.

+
+

Note

+

Current implementation assumes that the update does not change the sparsity +structure of the original matrix. Updating the matrix with new sparsity structure +is a harder problem that we will try to tackle later on.

+
+
+

Note

+

Current implementation assumes new_csr is upper triangular.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_csr + +
+

A sparse CSR matrix (scipy.sparse.csr_matrix) where the column dimension matches the column dimension of the LD matrix.

+
+
+ required +
start_row + +
+

The start row for the chunk to update.

+
+
+ None +
end_row + +
+

The end row for the chunk to update.

+
+
+ None +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ AssertionError + +
+

if the column dimension of new_csr does not match the column dimension

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def update_rows_inplace(self, new_csr, start_row=None, end_row=None):
+    """
+    A utility function to perform partial updates to a subset of rows in the
+    LD matrix. The function takes a new CSR matrix and, optionally, a start
+    and end row delimiting the chunk of the LD matrix to update with the `new_csr`.
+
+    !!! note
+        Current implementation assumes that the update does not change the sparsity
+        structure of the original matrix. Updating the matrix with new sparsity structure
+        is a harder problem that we will try to tackle later on.
+
+    !!! note
+        Current implementation assumes `new_csr` is upper triangular.
+
+    :param new_csr: A sparse CSR matrix (`scipy.sparse.csr_matrix`) where the column dimension
+    matches the column dimension of the LD matrix.
+    :param start_row: The start row for the chunk to update.
+    :param end_row: The end row for the chunk to update.
+
+    :raises AssertionError: if the column dimension of `new_csr` does not match the column dimension
+    """
+
+    assert new_csr.shape[1] == self.stored_n_snps
+
+    start_row = start_row or 0
+    end_row = end_row or self.stored_n_snps
+
+    # Sanity checking:
+    assert start_row >= 0
+    assert end_row <= self.stored_n_snps
+
+    indptr = self._zg['matrix/indptr'][:]
+
+    data_start = indptr[start_row]
+    data_end = indptr[end_row]
+
+    # TODO: Check that this covers most cases and would not result in unexpected behavior
+    if np.issubdtype(self.stored_dtype, np.integer) and np.issubdtype(new_csr.dtype, np.floating):
+        self._zg['matrix/data'][data_start:data_end] = quantize(new_csr.data, int_dtype=self.stored_dtype)
+    else:
+        self._zg['matrix/data'][data_start:data_end] = new_csr.data.astype(self.stored_dtype)
+
+
+
+ +
+ + +
+ + + +

+ validate_ld_matrix() + +

+ + +
+ +

Checks that the LDMatrix object has correct structure and +checks its contents for validity.

+

Specifically, we check that: +* The dimensions of the matrix and its associated attributes are matching. +* The masking is working properly.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

True if the matrix has the correct structure.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ ValueError + +
+

if the matrix is not valid.

+
+
+ +
+ Source code in magenpy/LDMatrix.py +
def validate_ld_matrix(self):
+    """
+    Checks that the `LDMatrix` object has correct structure and
+    checks its contents for validity.
+
+    Specifically, we check that:
+    * The dimensions of the matrix and its associated attributes are matching.
+    * The masking is working properly.
+
+    :return: True if the matrix has the correct structure.
+    :raises ValueError: if the matrix is not valid.
+    """
+
+    class_attrs = ['snps', 'a1', 'a2', 'maf', 'bp_position', 'cm_position', 'ld_score']
+
+    for attr in class_attrs:
+        attribute = getattr(self, attr)
+        if attribute is None:
+            continue
+        if len(attribute) != len(self):
+            raise ValueError(f"Invalid LD Matrix: Dimensions for attribute {attr} are not aligned!")
+
+    # TODO: Add other sanity checks here?
+
+    return True
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/SampleTable/index.html b/api/SampleTable/index.html new file mode 100644 index 0000000..216673f --- /dev/null +++ b/api/SampleTable/index.html @@ -0,0 +1,3727 @@ + + + + + + + + + + + + + + + + + + + SampleTable - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

SampleTable

+ +
+ + + + +
+

+ Bases: object

+ + +

A class to represent sample (individual) information and attributes in +the context of a genotype matrix. The sample table is a wrapper around +a pandas.DataFrame object that contains the sample information. The +table provides methods to read and write sample information from/to +disk, filter samples, perofm checks/validation, and extract specific columns +from the table.

+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
table + Union[DataFrame, None] + +
+

The sample table as a pandas DataFrame.

+
+
_phenotype_likelihood + Union[str, None] + +
+

The likelihood of the phenotype values (if present).

+
+
_covariate_cols + +
+

The names or IDs of covariates that are present in the sample table.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
class SampleTable(object):
+    """
+    A class to represent sample (individual) information and attributes in
+    the context of a genotype matrix. The sample table is a wrapper around
+    a `pandas.DataFrame` object that contains the sample information. The
+    table provides methods to read and write sample information from/to
+    disk, filter samples, perofm checks/validation, and extract specific columns
+    from the table.
+
+    :ivar table: The sample table as a pandas `DataFrame`.
+    :ivar _phenotype_likelihood: The likelihood of the phenotype values (if present).
+    :ivar _covariate_cols: The names or IDs of covariates that are present in the sample table.
+
+    """
+
+    def __init__(self,
+                 table: Union[pd.DataFrame, None] = None,
+                 phenotype_likelihood: Union[str, None] = None):
+        """
+        Initialize the sample table object.
+        :param table: A pandas DataFrame with the sample information.
+        :param phenotype_likelihood: The likelihood of the phenotype values.
+        """
+
+        self.table: Union[pd.DataFrame, None] = table
+
+        if self.table is not None and 'original_index' not in self.table.columns:
+            self.table['original_index'] = np.arange(len(self.table))
+
+        assert phenotype_likelihood in (None, 'binomial', 'gaussian', 'infer')
+
+        self._phenotype_likelihood: Union[str, None] = phenotype_likelihood
+        self._covariate_cols = None
+
+        if self.table is not None:
+            self.post_check_phenotype()
+
+    @property
+    def shape(self):
+        """
+        :return: The shape of the sample table (mainly sample size) as a tuple (n,).
+        """
+        return (self.n,)
+
+    @property
+    def n(self):
+        """
+        !!! seealso "See Also"
+            * [sample_size][magenpy.SampleTable.SampleTable.sample_size]
+
+        :return: The sample size (number of individuals) in the sample table.
+        """
+        return len(self.table)
+
+    @property
+    def sample_size(self):
+        """
+        !!! seealso "See Also"
+            * [n][magenpy.SampleTable.SampleTable.n]
+
+        :return: he sample size (number of individuals) in the sample table.
+        """
+        return self.n
+
+    @property
+    def iid(self):
+        """
+        :return: The individual ID of each individual in the sample table.
+        """
+        if self.table is not None:
+            return self.table['IID'].values
+
+    @property
+    def fid(self):
+        """
+        :return: The family ID of each individual in the sample table.
+        """
+        if self.table is not None:
+            return self.table['FID'].values
+
+    @property
+    def phenotype(self):
+        """
+        :return: The phenotype column from the sample table.
+        :raises KeyError: If the phenotype is not set.
+        """
+        if self.table is not None:
+            try:
+                return self.table['phenotype'].values
+            except KeyError:
+                raise KeyError("The phenotype is not set!")
+
+    @property
+    def original_index(self):
+        """
+        :return: The original index of each individual in the sample table (before applying any filters).
+        """
+        if self.table is not None:
+            return self.table['original_index'].values
+
+    @property
+    def covariates(self):
+        """
+        :return: The column names for the covariates stored in the sample table.
+        """
+        return self._covariate_cols
+
+    @property
+    def phenotype_likelihood(self):
+        """
+        :return: The phenotype likelihood family.
+        """
+        return self._phenotype_likelihood
+
+    @classmethod
+    def from_fam_file(cls, fam_file):
+        """
+        Initialize a sample table object from a path to PLINK FAM file.
+        :param fam_file: The path to the FAM file.
+
+        :return: A `SampleTable` object.
+        """
+
+        from .parsers.plink_parsers import parse_fam_file
+
+        s_tab = parse_fam_file(fam_file)
+        return cls(table=s_tab)
+
+    @classmethod
+    def from_phenotype_file(cls, phenotype_file, filter_na=True, **read_csv_kwargs):
+        """
+        Initialize a sample table from a phenotype file.
+        :param phenotype_file: The path to the phenotype file.
+        :param filter_na: Filter samples with missing phenotype values (Default: True).
+        :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+
+        :return: A `SampleTable` object.
+        """
+        s_tab = cls()
+        s_tab.read_phenotype_file(phenotype_file, filter_na, **read_csv_kwargs)
+        return s_tab
+
+    @classmethod
+    def from_covariate_file(cls, covar_file, **read_csv_kwargs):
+        """
+        Initialize a sample table from a file of covariates.
+        :param covar_file: The path to the covariates file.
+        :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+
+        :return: A `SampleTable` object.
+        """
+        s_tab = cls()
+        s_tab.read_covariates_file(covar_file, **read_csv_kwargs)
+        return s_tab
+
+    def read_phenotype_file(self, phenotype_file, drop_na=True, **read_csv_kwargs):
+        """
+        Read the phenotype file from disk. The expected format is Family ID (`FID`),
+        Individual ID (`IID`) and the phenotype column `phenotype`. You may adjust
+        the parsing configurations with keyword arguments that will be passed to `pandas.read_csv`.
+
+        :param phenotype_file: The path to the phenotype file.
+        :param drop_na: Drop samples whose phenotype value is missing (Default: True).
+        :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+        """
+
+        if 'sep' not in read_csv_kwargs and 'delimiter' not in read_csv_kwargs:
+            read_csv_kwargs['sep'] = r'\s+'
+
+        if 'na_values' not in read_csv_kwargs:
+            read_csv_kwargs['na_values'] = {'phenotype': [-9.]}
+
+        if 'dtype' not in read_csv_kwargs:
+            read_csv_kwargs['dtype'] = {'phenotype': float}
+
+        pheno_table = pd.read_csv(phenotype_file, **read_csv_kwargs)
+        pheno_table.columns = ['FID', 'IID', 'phenotype']
+
+        if self.table is not None:
+            pheno_table['FID'] = pheno_table['FID'].astype(type(self.fid[0]))
+            pheno_table['IID'] = pheno_table['IID'].astype(type(self.iid[0]))
+
+            # Drop the phenotype column if already exists:
+            if 'phenotype' in self.table.columns:
+                self.table.drop(columns=['phenotype'])
+
+            self.table = self.table.merge(pheno_table, on=['FID', 'IID'])
+        else:
+            self.table = pheno_table
+
+        if self.table['phenotype'].isnull().all():
+            self.table.drop('phenotype', axis=1, inplace=True)
+        elif drop_na:
+            # Maybe using converters in the read_csv above?
+            self.table = self.table.dropna(subset=['phenotype'])
+
+        self.post_check_phenotype()
+
+    def read_covariates_file(self, covar_file, **read_csv_kwargs):
+        """
+        Read the covariates file from the provided path. The expected format is Family ID (`FID`),
+        Individual ID (`IID`) and the remaining columns are assumed to be covariates. You may adjust
+        the parsing configurations with keyword arguments that will be passed to `pandas.read_csv`.
+
+        :param covar_file: The path to the covariates file.
+        :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+        """
+
+        if 'sep' not in read_csv_kwargs and 'delimiter' not in read_csv_kwargs:
+            read_csv_kwargs['sep'] = r'\s+'
+
+        covar_table = pd.read_csv(covar_file, **read_csv_kwargs)
+        self._covariate_cols = covar_table.columns[2:]
+        covar_table.columns = ['FID', 'IID'] + list(self._covariate_cols)
+
+        if self.table is not None:
+            covar_table['FID'] = covar_table['FID'].astype(type(self.fid[0]))
+            covar_table['IID'] = covar_table['IID'].astype(type(self.iid[0]))
+
+            self.table = self.table.merge(covar_table)
+        else:
+            self.table = covar_table
+
+    def post_check_phenotype(self):
+        """
+        Apply some simple heuristics to check the phenotype values
+        provided by the user and infer the phenotype likelihood (if feasible).
+
+        :raises ValueError: If the phenotype values could not be matched with the
+        inferred phenotype likelihood.
+        """
+
+        if 'phenotype' in self.table.columns:
+
+            unique_vals = self.table['phenotype'].unique()
+
+            if self.table['phenotype'].isnull().all():
+                self.table.drop('phenotype', axis=1, inplace=True)
+            elif self._phenotype_likelihood != 'gaussian':
+
+                if len(unique_vals) > 2:
+                    self._phenotype_likelihood = 'gaussian'
+                    return
+
+                unique_vals = sorted(unique_vals)
+
+                if unique_vals == [1, 2]:
+                    # Plink coding for case/control
+                    self.table['phenotype'] -= 1
+                    self._phenotype_likelihood = 'binomial'
+                elif unique_vals == [0, 1]:
+                    self._phenotype_likelihood = 'binomial'
+                else:
+                    raise ValueError(f"Unknown values for binary traits: {unique_vals}. "
+                                     f"The software only supports 0/1 or 1/2 coding for cases and controls.")
+
+    def filter_samples(self, keep_samples=None, keep_file=None):
+        """
+        Filter samples from the samples table. User must specify
+        either a list of samples to keep or the path to a file
+        with the list of samples to keep.
+
+        :param keep_samples: A list (or array) of sample IDs to keep.
+        :param keep_file: The path to a file with the list of samples to keep.
+        """
+
+        assert keep_samples is not None or keep_file is not None
+
+        if keep_samples is None:
+            from .parsers.misc_parsers import read_sample_filter_file
+            keep_samples = read_sample_filter_file(keep_file)
+
+        self.table = self.table.merge(pd.DataFrame({'IID': keep_samples},
+                                                   dtype=type(self.iid[0])))
+
+    def to_table(self, col_subset=None):
+        """
+        Get the sample table as a pandas DataFrame.
+
+        :param col_subset: A subset of the columns to include in the table.
+        :return: A pandas DataFrame with the sample information.
+        """
+        if col_subset is not None:
+            return self.table[list(col_subset)]
+        else:
+            return self.table
+
+    def get_individual_table(self):
+        """
+        :return: A table of individual IDs (FID, IID) present in the sample table.
+        """
+        return self.to_table(col_subset=['FID', 'IID'])
+
+    def get_phenotype_table(self):
+        """
+        :return: A table of individual IDs and phenotype values (FID IID phenotype) in the sample table.
+        """
+        try:
+            return self.to_table(col_subset=['FID', 'IID', 'phenotype'])
+        except KeyError:
+            raise KeyError("The phenotype is not set!")
+
+    def get_covariates_table(self, covar_subset=None):
+        """
+        Get a table of covariates associated with each individual in the
+        sample table. The table will be formatted as (FID, IID, covar1, covar2, ...).
+
+        :param covar_subset: A subset of the covariate names or IDs to include in the table.
+        :return: A pandas DataFrame with the covariate information.
+        """
+        assert self._covariate_cols is not None
+
+        if covar_subset is None:
+            covar = self._covariate_cols
+        else:
+            covar = list(set(self._covariate_cols).intersection(set(covar_subset)))
+
+        assert len(covar) >= 1
+
+        return self.to_table(col_subset=['FID', 'IID'] + covar)
+
+    def get_covariates(self, covar_subset=None):
+        """
+        Get the covariates associated with each individual in the sample table as a matrix.
+        :param covar_subset: A subset of the covariate names or IDs to include in the matrix.
+
+        :return: A numpy array with the covariate values.
+        """
+        return self.get_covariates_table(covar_subset=covar_subset).iloc[:, 2:].values
+
+    def set_phenotype(self, phenotype, phenotype_likelihood=None):
+        """
+        Update the phenotype in the sample table using the provided values.
+        :param phenotype: The new phenotype values, represented by a numpy array or Iterable.
+        :param phenotype_likelihood: The likelihood of the phenotype values.
+        """
+
+        self.table['phenotype'] = phenotype
+
+        if phenotype_likelihood:
+            self._phenotype_likelihood = phenotype_likelihood
+        else:
+            self.post_check_phenotype()
+
+    def to_file(self, output_file, col_subset=None, **to_csv_kwargs):
+        """
+        Write the contents of the sample table to file.
+        :param output_file: The path to the file where to write the sample table.
+        :param col_subset: A subset of the columns to write to file.
+        :param to_csv_kwargs: keyword arguments to pass to the `to_csv` function of `pandas`.
+        """
+
+        assert self.table is not None
+
+        if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+            to_csv_kwargs['sep'] = '\t'
+
+        if 'index' not in to_csv_kwargs:
+            to_csv_kwargs['index'] = False
+
+        if col_subset is not None:
+            table = self.table[col_subset]
+        else:
+            table = self.table
+
+        table.to_csv(output_file, **to_csv_kwargs)
+
+    def __len__(self):
+        return self.n
+
+    def __eq__(self, other):
+        return np.array_equal(self.iid, other.iid)
+
+
+ + + +
+ + + + + + + +
+ + + +

+ covariates + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The column names for the covariates stored in the sample table.

+
+
+
+ +
+ +
+ + + +

+ fid + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The family ID of each individual in the sample table.

+
+
+
+ +
+ +
+ + + +

+ iid + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The individual ID of each individual in the sample table.

+
+
+
+ +
+ +
+ + + +

+ n + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size (number of individuals) in the sample table.

+
+
+
+ +
+ +
+ + + +

+ original_index + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The original index of each individual in the sample table (before applying any filters).

+
+
+
+ +
+ +
+ + + +

+ phenotype + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The phenotype column from the sample table.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the phenotype is not set.

+
+
+
+ +
+ +
+ + + +

+ phenotype_likelihood + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The phenotype likelihood family.

+
+
+
+ +
+ +
+ + + +

+ sample_size + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • n
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

he sample size (number of individuals) in the sample table.

+
+
+
+ +
+ +
+ + + +

+ shape + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The shape of the sample table (mainly sample size) as a tuple (n,).

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(table=None, phenotype_likelihood=None) + +

+ + +
+ +

Initialize the sample table object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
table + Union[DataFrame, None] + +
+

A pandas DataFrame with the sample information.

+
+
+ None +
phenotype_likelihood + Union[str, None] + +
+

The likelihood of the phenotype values.

+
+
+ None +
+ +
+ Source code in magenpy/SampleTable.py +
22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
def __init__(self,
+             table: Union[pd.DataFrame, None] = None,
+             phenotype_likelihood: Union[str, None] = None):
+    """
+    Initialize the sample table object.
+    :param table: A pandas DataFrame with the sample information.
+    :param phenotype_likelihood: The likelihood of the phenotype values.
+    """
+
+    self.table: Union[pd.DataFrame, None] = table
+
+    if self.table is not None and 'original_index' not in self.table.columns:
+        self.table['original_index'] = np.arange(len(self.table))
+
+    assert phenotype_likelihood in (None, 'binomial', 'gaussian', 'infer')
+
+    self._phenotype_likelihood: Union[str, None] = phenotype_likelihood
+    self._covariate_cols = None
+
+    if self.table is not None:
+        self.post_check_phenotype()
+
+
+
+ +
+ + +
+ + + +

+ filter_samples(keep_samples=None, keep_file=None) + +

+ + +
+ +

Filter samples from the samples table. User must specify +either a list of samples to keep or the path to a file +with the list of samples to keep.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
keep_samples + +
+

A list (or array) of sample IDs to keep.

+
+
+ None +
keep_file + +
+

The path to a file with the list of samples to keep.

+
+
+ None +
+ +
+ Source code in magenpy/SampleTable.py +
def filter_samples(self, keep_samples=None, keep_file=None):
+    """
+    Filter samples from the samples table. User must specify
+    either a list of samples to keep or the path to a file
+    with the list of samples to keep.
+
+    :param keep_samples: A list (or array) of sample IDs to keep.
+    :param keep_file: The path to a file with the list of samples to keep.
+    """
+
+    assert keep_samples is not None or keep_file is not None
+
+    if keep_samples is None:
+        from .parsers.misc_parsers import read_sample_filter_file
+        keep_samples = read_sample_filter_file(keep_file)
+
+    self.table = self.table.merge(pd.DataFrame({'IID': keep_samples},
+                                               dtype=type(self.iid[0])))
+
+
+
+ +
+ + +
+ + + +

+ from_covariate_file(covar_file, **read_csv_kwargs) + + + classmethod + + +

+ + +
+ +

Initialize a sample table from a file of covariates.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
covar_file + +
+

The path to the covariates file.

+
+
+ required +
read_csv_kwargs + +
+

keyword arguments to pass to the read_csv function of pandas.

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SampleTable object.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
@classmethod
+def from_covariate_file(cls, covar_file, **read_csv_kwargs):
+    """
+    Initialize a sample table from a file of covariates.
+    :param covar_file: The path to the covariates file.
+    :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+
+    :return: A `SampleTable` object.
+    """
+    s_tab = cls()
+    s_tab.read_covariates_file(covar_file, **read_csv_kwargs)
+    return s_tab
+
+
+
+ +
+ + +
+ + + +

+ from_fam_file(fam_file) + + + classmethod + + +

+ + +
+ +

Initialize a sample table object from a path to PLINK FAM file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fam_file + +
+

The path to the FAM file.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SampleTable object.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
@classmethod
+def from_fam_file(cls, fam_file):
+    """
+    Initialize a sample table object from a path to PLINK FAM file.
+    :param fam_file: The path to the FAM file.
+
+    :return: A `SampleTable` object.
+    """
+
+    from .parsers.plink_parsers import parse_fam_file
+
+    s_tab = parse_fam_file(fam_file)
+    return cls(table=s_tab)
+
+
+
+ +
+ + +
+ + + +

+ from_phenotype_file(phenotype_file, filter_na=True, **read_csv_kwargs) + + + classmethod + + +

+ + +
+ +

Initialize a sample table from a phenotype file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype_file + +
+

The path to the phenotype file.

+
+
+ required +
filter_na + +
+

Filter samples with missing phenotype values (Default: True).

+
+
+ True +
read_csv_kwargs + +
+

keyword arguments to pass to the read_csv function of pandas.

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SampleTable object.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
@classmethod
+def from_phenotype_file(cls, phenotype_file, filter_na=True, **read_csv_kwargs):
+    """
+    Initialize a sample table from a phenotype file.
+    :param phenotype_file: The path to the phenotype file.
+    :param filter_na: Filter samples with missing phenotype values (Default: True).
+    :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+
+    :return: A `SampleTable` object.
+    """
+    s_tab = cls()
+    s_tab.read_phenotype_file(phenotype_file, filter_na, **read_csv_kwargs)
+    return s_tab
+
+
+
+ +
+ + +
+ + + +

+ get_covariates(covar_subset=None) + +

+ + +
+ +

Get the covariates associated with each individual in the sample table as a matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
covar_subset + +
+

A subset of the covariate names or IDs to include in the matrix.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy array with the covariate values.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def get_covariates(self, covar_subset=None):
+    """
+    Get the covariates associated with each individual in the sample table as a matrix.
+    :param covar_subset: A subset of the covariate names or IDs to include in the matrix.
+
+    :return: A numpy array with the covariate values.
+    """
+    return self.get_covariates_table(covar_subset=covar_subset).iloc[:, 2:].values
+
+
+
+ +
+ + +
+ + + +

+ get_covariates_table(covar_subset=None) + +

+ + +
+ +

Get a table of covariates associated with each individual in the +sample table. The table will be formatted as (FID, IID, covar1, covar2, ...).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
covar_subset + +
+

A subset of the covariate names or IDs to include in the table.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame with the covariate information.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def get_covariates_table(self, covar_subset=None):
+    """
+    Get a table of covariates associated with each individual in the
+    sample table. The table will be formatted as (FID, IID, covar1, covar2, ...).
+
+    :param covar_subset: A subset of the covariate names or IDs to include in the table.
+    :return: A pandas DataFrame with the covariate information.
+    """
+    assert self._covariate_cols is not None
+
+    if covar_subset is None:
+        covar = self._covariate_cols
+    else:
+        covar = list(set(self._covariate_cols).intersection(set(covar_subset)))
+
+    assert len(covar) >= 1
+
+    return self.to_table(col_subset=['FID', 'IID'] + covar)
+
+
+
+ +
+ + +
+ + + +

+ get_individual_table() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A table of individual IDs (FID, IID) present in the sample table.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def get_individual_table(self):
+    """
+    :return: A table of individual IDs (FID, IID) present in the sample table.
+    """
+    return self.to_table(col_subset=['FID', 'IID'])
+
+
+
+ +
+ + +
+ + + +

+ get_phenotype_table() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A table of individual IDs and phenotype values (FID IID phenotype) in the sample table.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def get_phenotype_table(self):
+    """
+    :return: A table of individual IDs and phenotype values (FID IID phenotype) in the sample table.
+    """
+    try:
+        return self.to_table(col_subset=['FID', 'IID', 'phenotype'])
+    except KeyError:
+        raise KeyError("The phenotype is not set!")
+
+
+
+ +
+ + +
+ + + +

+ post_check_phenotype() + +

+ + +
+ +

Apply some simple heuristics to check the phenotype values +provided by the user and infer the phenotype likelihood (if feasible).

+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ ValueError + +
+

If the phenotype values could not be matched with the inferred phenotype likelihood.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def post_check_phenotype(self):
+    """
+    Apply some simple heuristics to check the phenotype values
+    provided by the user and infer the phenotype likelihood (if feasible).
+
+    :raises ValueError: If the phenotype values could not be matched with the
+    inferred phenotype likelihood.
+    """
+
+    if 'phenotype' in self.table.columns:
+
+        unique_vals = self.table['phenotype'].unique()
+
+        if self.table['phenotype'].isnull().all():
+            self.table.drop('phenotype', axis=1, inplace=True)
+        elif self._phenotype_likelihood != 'gaussian':
+
+            if len(unique_vals) > 2:
+                self._phenotype_likelihood = 'gaussian'
+                return
+
+            unique_vals = sorted(unique_vals)
+
+            if unique_vals == [1, 2]:
+                # Plink coding for case/control
+                self.table['phenotype'] -= 1
+                self._phenotype_likelihood = 'binomial'
+            elif unique_vals == [0, 1]:
+                self._phenotype_likelihood = 'binomial'
+            else:
+                raise ValueError(f"Unknown values for binary traits: {unique_vals}. "
+                                 f"The software only supports 0/1 or 1/2 coding for cases and controls.")
+
+
+
+ +
+ + +
+ + + +

+ read_covariates_file(covar_file, **read_csv_kwargs) + +

+ + +
+ +

Read the covariates file from the provided path. The expected format is Family ID (FID), +Individual ID (IID) and the remaining columns are assumed to be covariates. You may adjust +the parsing configurations with keyword arguments that will be passed to pandas.read_csv.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
covar_file + +
+

The path to the covariates file.

+
+
+ required +
read_csv_kwargs + +
+

keyword arguments to pass to the read_csv function of pandas.

+
+
+ {} +
+ +
+ Source code in magenpy/SampleTable.py +
def read_covariates_file(self, covar_file, **read_csv_kwargs):
+    """
+    Read the covariates file from the provided path. The expected format is Family ID (`FID`),
+    Individual ID (`IID`) and the remaining columns are assumed to be covariates. You may adjust
+    the parsing configurations with keyword arguments that will be passed to `pandas.read_csv`.
+
+    :param covar_file: The path to the covariates file.
+    :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+    """
+
+    if 'sep' not in read_csv_kwargs and 'delimiter' not in read_csv_kwargs:
+        read_csv_kwargs['sep'] = r'\s+'
+
+    covar_table = pd.read_csv(covar_file, **read_csv_kwargs)
+    self._covariate_cols = covar_table.columns[2:]
+    covar_table.columns = ['FID', 'IID'] + list(self._covariate_cols)
+
+    if self.table is not None:
+        covar_table['FID'] = covar_table['FID'].astype(type(self.fid[0]))
+        covar_table['IID'] = covar_table['IID'].astype(type(self.iid[0]))
+
+        self.table = self.table.merge(covar_table)
+    else:
+        self.table = covar_table
+
+
+
+ +
+ + +
+ + + +

+ read_phenotype_file(phenotype_file, drop_na=True, **read_csv_kwargs) + +

+ + +
+ +

Read the phenotype file from disk. The expected format is Family ID (FID), +Individual ID (IID) and the phenotype column phenotype. You may adjust +the parsing configurations with keyword arguments that will be passed to pandas.read_csv.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype_file + +
+

The path to the phenotype file.

+
+
+ required +
drop_na + +
+

Drop samples whose phenotype value is missing (Default: True).

+
+
+ True +
read_csv_kwargs + +
+

keyword arguments to pass to the read_csv function of pandas.

+
+
+ {} +
+ +
+ Source code in magenpy/SampleTable.py +
def read_phenotype_file(self, phenotype_file, drop_na=True, **read_csv_kwargs):
+    """
+    Read the phenotype file from disk. The expected format is Family ID (`FID`),
+    Individual ID (`IID`) and the phenotype column `phenotype`. You may adjust
+    the parsing configurations with keyword arguments that will be passed to `pandas.read_csv`.
+
+    :param phenotype_file: The path to the phenotype file.
+    :param drop_na: Drop samples whose phenotype value is missing (Default: True).
+    :param read_csv_kwargs: keyword arguments to pass to the `read_csv` function of `pandas`.
+    """
+
+    if 'sep' not in read_csv_kwargs and 'delimiter' not in read_csv_kwargs:
+        read_csv_kwargs['sep'] = r'\s+'
+
+    if 'na_values' not in read_csv_kwargs:
+        read_csv_kwargs['na_values'] = {'phenotype': [-9.]}
+
+    if 'dtype' not in read_csv_kwargs:
+        read_csv_kwargs['dtype'] = {'phenotype': float}
+
+    pheno_table = pd.read_csv(phenotype_file, **read_csv_kwargs)
+    pheno_table.columns = ['FID', 'IID', 'phenotype']
+
+    if self.table is not None:
+        pheno_table['FID'] = pheno_table['FID'].astype(type(self.fid[0]))
+        pheno_table['IID'] = pheno_table['IID'].astype(type(self.iid[0]))
+
+        # Drop the phenotype column if already exists:
+        if 'phenotype' in self.table.columns:
+            self.table.drop(columns=['phenotype'])
+
+        self.table = self.table.merge(pheno_table, on=['FID', 'IID'])
+    else:
+        self.table = pheno_table
+
+    if self.table['phenotype'].isnull().all():
+        self.table.drop('phenotype', axis=1, inplace=True)
+    elif drop_na:
+        # Maybe using converters in the read_csv above?
+        self.table = self.table.dropna(subset=['phenotype'])
+
+    self.post_check_phenotype()
+
+
+
+ +
+ + +
+ + + +

+ set_phenotype(phenotype, phenotype_likelihood=None) + +

+ + +
+ +

Update the phenotype in the sample table using the provided values.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype + +
+

The new phenotype values, represented by a numpy array or Iterable.

+
+
+ required +
phenotype_likelihood + +
+

The likelihood of the phenotype values.

+
+
+ None +
+ +
+ Source code in magenpy/SampleTable.py +
def set_phenotype(self, phenotype, phenotype_likelihood=None):
+    """
+    Update the phenotype in the sample table using the provided values.
+    :param phenotype: The new phenotype values, represented by a numpy array or Iterable.
+    :param phenotype_likelihood: The likelihood of the phenotype values.
+    """
+
+    self.table['phenotype'] = phenotype
+
+    if phenotype_likelihood:
+        self._phenotype_likelihood = phenotype_likelihood
+    else:
+        self.post_check_phenotype()
+
+
+
+ +
+ + +
+ + + +

+ to_file(output_file, col_subset=None, **to_csv_kwargs) + +

+ + +
+ +

Write the contents of the sample table to file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_file + +
+

The path to the file where to write the sample table.

+
+
+ required +
col_subset + +
+

A subset of the columns to write to file.

+
+
+ None +
to_csv_kwargs + +
+

keyword arguments to pass to the to_csv function of pandas.

+
+
+ {} +
+ +
+ Source code in magenpy/SampleTable.py +
def to_file(self, output_file, col_subset=None, **to_csv_kwargs):
+    """
+    Write the contents of the sample table to file.
+    :param output_file: The path to the file where to write the sample table.
+    :param col_subset: A subset of the columns to write to file.
+    :param to_csv_kwargs: keyword arguments to pass to the `to_csv` function of `pandas`.
+    """
+
+    assert self.table is not None
+
+    if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+        to_csv_kwargs['sep'] = '\t'
+
+    if 'index' not in to_csv_kwargs:
+        to_csv_kwargs['index'] = False
+
+    if col_subset is not None:
+        table = self.table[col_subset]
+    else:
+        table = self.table
+
+    table.to_csv(output_file, **to_csv_kwargs)
+
+
+
+ +
+ + +
+ + + +

+ to_table(col_subset=None) + +

+ + +
+ +

Get the sample table as a pandas DataFrame.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

A subset of the columns to include in the table.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame with the sample information.

+
+
+ +
+ Source code in magenpy/SampleTable.py +
def to_table(self, col_subset=None):
+    """
+    Get the sample table as a pandas DataFrame.
+
+    :param col_subset: A subset of the columns to include in the table.
+    :return: A pandas DataFrame with the sample information.
+    """
+    if col_subset is not None:
+        return self.table[list(col_subset)]
+    else:
+        return self.table
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/SumstatsTable/index.html b/api/SumstatsTable/index.html new file mode 100644 index 0000000..c6af43e --- /dev/null +++ b/api/SumstatsTable/index.html @@ -0,0 +1,5647 @@ + + + + + + + + + + + + + + + + + + + SumstatsTable - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

SumstatsTable

+ +
+ + + + +
+

+ Bases: object

+ + +

A wrapper class for representing the summary statistics obtained from +Genome-wide Association Studies (GWAS). GWAS software tools publish their +results in the form of summary statistics, which include the SNP rsIDs, +the effect/reference alleles tested, the marginal effect sizes (BETA), +the standard errors (SE), the Z-scores, the p-values, etc.

+

This class provides a convenient way to access/manipulate/harmonize these summary statistics +across various formats. Particularly, given the heterogeneity in summary statistics +formats, this class provides a common interface to access these statistics +in a consistent manner. The class also supports computing some derived statistics +from the summary statistics, such as the pseudo-correlation between the SNP and the +phenotype, the Chi-squared statistics, etc.

+ + + +

Attributes:

+ + + + + + + + + + + + + + + +
NameTypeDescription
table + DataFrame + +
+

A pandas DataFrame containing the summary statistics.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
class SumstatsTable(object):
+    """
+    A wrapper class for representing the summary statistics obtained from
+    Genome-wide Association Studies (GWAS). GWAS software tools publish their
+    results in the form of summary statistics, which include the SNP rsIDs,
+    the effect/reference alleles tested, the marginal effect sizes (BETA),
+    the standard errors (SE), the Z-scores, the p-values, etc.
+
+    This class provides a convenient way to access/manipulate/harmonize these summary statistics
+    across various formats. Particularly, given the heterogeneity in summary statistics
+    formats, this class provides a common interface to access these statistics
+    in a consistent manner. The class also supports computing some derived statistics
+    from the summary statistics, such as the pseudo-correlation between the SNP and the
+    phenotype, the Chi-squared statistics, etc.
+
+    :ivar table: A pandas DataFrame containing the summary statistics.
+    """
+
+    def __init__(self, ss_table: pd.DataFrame):
+        """
+        Initialize the summary statistics table.
+
+        :param ss_table: A pandas DataFrame containing the summary statistics.
+
+        !!! seealso "See Also"
+            * [from_file][magenpy.SumstatsTable.SumstatsTable.from_file]
+        """
+        self.table: pd.DataFrame = ss_table
+
+        assert all([col in self.table.columns for col in ('SNP', 'A1')])
+
+    @property
+    def shape(self):
+        """
+        :return: he shape of the summary statistics table.
+        """
+        return self.table.shape
+
+    def __len__(self):
+        return len(self.table)
+
+    @property
+    def chromosome(self):
+        """
+        A convenience method to return the chromosome number if there is only one chromosome in the summary statistics.
+        If multiple chromosomes are present, it returns None.
+
+        :return: The chromosome number if there is only one chromosome in the summary statistics.
+        """
+        chrom = self.chromosomes
+        if chrom is not None and len(chrom) == 1:
+            return chrom[0]
+
+    @property
+    def chromosomes(self):
+        """
+        :return: The unique chromosomes in the summary statistics table.
+        """
+        if 'CHR' in self.table.columns:
+            return sorted(self.table['CHR'].unique())
+
+    @property
+    def m(self):
+        """
+        !!! seealso "See Also"
+            * [n_snps][magenpy.SumstatsTable.SumstatsTable.n_snps]
+
+        :return: The number of variants in the summary statistics table.
+        """
+        return self.n_snps
+
+    @property
+    def n_snps(self):
+        """
+        !!! seealso "See Also"
+            * [m][magenpy.SumstatsTable.SumstatsTable.m]
+
+        :return: The number of variants in the summary statistics table.
+        """
+        return len(self.table)
+
+    @property
+    def snps(self):
+        """
+        :return: The rsIDs associated with each variant in the summary statistics table.
+        """
+        return self.table['SNP'].values
+
+    @property
+    def a1(self):
+        """
+        !!! seealso "See Also"
+            * [effect_allele][magenpy.SumstatsTable.SumstatsTable.effect_allele]
+            * [alt_allele][magenpy.SumstatsTable.SumstatsTable.alt_allele]
+
+        :return: The alternative or effect allele for each variant in the summary statistics table.
+
+        """
+        return self.table['A1'].values
+
+    @property
+    def a2(self):
+        """
+        !!! seealso "See Also"
+            * [ref_allele][magenpy.SumstatsTable.SumstatsTable.ref_allele]
+
+        :return: The reference allele for each variant in the summary statistics table.
+        """
+        return self.get_col('A2')
+
+    @property
+    def ref_allele(self):
+        """
+        !!! seealso "See Also"
+            * [a2][magenpy.SumstatsTable.SumstatsTable.a2]
+
+        :return: The reference allele for each variant in the summary statistics table.
+        """
+        return self.a2
+
+    @property
+    def alt_allele(self):
+        """
+        !!! seealso "See Also"
+            * [effect_allele][magenpy.SumstatsTable.SumstatsTable.effect_allele]
+            * [a1][magenpy.SumstatsTable.SumstatsTable.a1]
+
+        :return: The alternative or effect allele for each variant in the summary statistics table.
+        """
+        return self.a1
+
+    @property
+    def effect_allele(self):
+        """
+        !!! seealso "See Also"
+            * [alt_allele][magenpy.SumstatsTable.SumstatsTable.alt_allele]
+            * [a1][magenpy.SumstatsTable.SumstatsTable.a1]
+
+        :return: The alternative or effect allele for each variant in the summary statistics table.
+        """
+        return self.a1
+
+    @property
+    def bp_pos(self):
+        """
+        :return: The base pair position for each variant in the summary statistics table.
+        """
+        return self.get_col('POS')
+
+    @property
+    def maf(self):
+        """
+        :return: The minor allele frequency for each variant in the summary statistics table.
+        """
+        return self.get_col('MAF')
+
+    @property
+    def maf_var(self):
+        """
+        :return: The variance of the minor allele frequency for each variant in the summary statistics table.
+        """
+        return 2.*self.maf*(1. - self.maf)
+
+    @property
+    def n(self):
+        """
+        !!! seealso "See Also"
+            * [n_per_snp][magenpy.SumstatsTable.SumstatsTable.n_per_snp]
+
+        :return: The sample size for the association test of each variant in the summary statistics table.
+        """
+        return self.get_col('N')
+
+    @property
+    def n_per_snp(self):
+        """
+        # TODO: Add a way to infer N from other sumstats if missing.
+
+        !!! seealso "See Also"
+            * [n][magenpy.SumstatsTable.SumstatsTable.n]
+
+        :return: The sample size for the association test of each variant in the summary statistics table.
+        """
+        return self.get_col('N')
+
+    @property
+    def beta_hat(self):
+        """
+        !!! seealso "See Also"
+            * [marginal_beta][magenpy.SumstatsTable.SumstatsTable.marginal_beta]
+
+        :return: The marginal beta from the association test of each variant on the phenotype.
+        """
+
+        beta = self.get_col('BETA')
+
+        if beta is None:
+            odds_ratio = self.odds_ratio
+            if odds_ratio is not None:
+                self.table['BETA'] = np.log(odds_ratio)
+                return self.table['BETA'].values
+        else:
+            return beta
+
+    @property
+    def marginal_beta(self):
+        """
+        !!! seealso "See Also"
+            * [beta_hat][magenpy.SumstatsTable.SumstatsTable.beta_hat]
+
+        :return: The marginal beta from the association test of each variant on the phenotype.
+        """
+        return self.beta_hat
+
+    @property
+    def odds_ratio(self):
+        """
+        :return: The odds ratio from the association test of each variant on case-control phenotypes.
+        """
+        return self.get_col('OR')
+
+    @property
+    def standardized_marginal_beta(self):
+        """
+        Get the marginal BETAs assuming that both the genotype matrix
+        and the phenotype vector are standardized column-wise to have mean zero and variance 1.
+        In some contexts, this is also known as the per-SNP correlation or
+        pseudo-correlation with the phenotype.
+
+        !!! seealso "See Also"
+            * [get_snp_pseudo_corr][magenpy.SumstatsTable.SumstatsTable.get_snp_pseudo_corr]
+
+        :return: The standardized marginal beta from the association test of each variant on the phenotype.
+        """
+        return self.get_snp_pseudo_corr()
+
+    @property
+    def z_score(self):
+        """
+        :return: The Z-score from the association test of each SNP on the phenotype.
+        :raises KeyError: If the Z-score statistic is not available and could not be inferred from available data.
+        """
+
+        z = self.get_col('Z')
+        if z is not None:
+            return z
+        else:
+            beta = self.beta_hat
+            se = self.se
+
+            if beta is not None and se is not None:
+                self.table['Z'] = beta / se
+                return self.table['Z'].values
+
+        raise KeyError("Z-score statistic is not available and could not be inferred from available data!")
+
+    @property
+    def standard_error(self):
+        """
+        !!! seealso "See Also"
+            * [se][magenpy.SumstatsTable.SumstatsTable.se]
+
+        :return: The standard error from the association test of each variant on the phenotype.
+
+        """
+        return self.get_col('SE')
+
+    @property
+    def se(self):
+        """
+        !!! seealso "See Also"
+            * [standard_error][magenpy.SumstatsTable.SumstatsTable.standard_error]
+
+        :return: The standard error from the association test of each variant on the phenotype.
+        """
+        return self.standard_error
+
+    @property
+    def pval(self):
+        """
+        !!! seealso "See Also"
+            * [p_value][magenpy.SumstatsTable.SumstatsTable.p_value]
+
+        :return: The p-value from the association test of each variant on the phenotype.
+        """
+        p = self.get_col('PVAL')
+
+        if p is not None:
+            return p
+        else:
+            from scipy import stats
+            self.table['PVAL'] = 2.*stats.norm.sf(np.abs(self.z_score))
+            return self.table['PVAL'].values
+
+    @property
+    def p_value(self):
+        """
+        !!! seealso "See Also"
+            * [pval][magenpy.SumstatsTable.SumstatsTable.pval]
+
+        :return: The p-value from the association test of each variant on the phenotype.
+        """
+        return self.pval
+
+    @property
+    def log10_p_value(self):
+        """
+        :return: The negative log10 of the p-value (-log10(p_value)) of association
+        test of each variant on the phenotype.
+        """
+        return -np.log10(self.pval)
+
+    @property
+    def effect_sign(self):
+        """
+        :return: The sign for the effect size (1 for positive effect, -1 for negative effect)
+        of each genetic variant ib the phenotype.
+
+        :raises KeyError: If the sign could not be inferred from available data.
+        """
+
+        signed_statistics = ['BETA', 'Z', 'OR']
+
+        for ss in signed_statistics:
+            ss_value = self.get_col(ss)
+            if ss_value is not None:
+                if ss == 'OR':
+                    return np.sign(np.log(ss_value))
+                else:
+                    return np.sign(ss_value)
+
+        raise KeyError("No signed statistic to extract the sign from!")
+
+    def infer_a2(self, reference_table, allow_na=False):
+        """
+        Infer the reference allele A2 (if not present in the SumstatsTable)
+        from a reference table. Make sure that the reference table contains the SNP ID,
+        the reference allele A2 and the alternative (i.e. effect) allele A1. It is the
+        user's responsibility to make sure that the reference table matches the summary
+        statistics in terms of the specification of reference vs. alternative. They are
+        allowed to be flipped, but they have to be consistent across the two tables.
+
+        :param reference_table: A pandas table containing the following columns at least:
+        `SNP`, `A1`, `A2`.
+        :param allow_na: If True, allow the reference allele to be missing from the final result.
+        """
+
+        # Merge the summary statistics table with the reference table on `SNP` ID:
+        merged_table = self.table[['SNP', 'A1']].merge(reference_table[['SNP', 'A1', 'A2']],
+                                                       how='left',
+                                                       on='SNP')
+        # If `A1_x` agrees with `A1_y`, then `A2` is indeed the reference allele.
+        # Otherwise, they are flipped and `A1_y` should be the reference allele:
+        merged_table['A2'] = np.where(merged_table['A1_x'] == merged_table['A1_y'],
+                                      merged_table['A2'],
+                                      merged_table['A1_y'])
+
+        # Check that the reference allele could be inferred for all SNPs:
+        if not allow_na and merged_table['A2'].isna().any():
+            raise ValueError("The reference allele could not be inferred for some SNPs!")
+        else:
+            self.table['A2'] = merged_table['A2']
+
+    def set_sample_size(self, n):
+        """
+        Set the sample size for each variant in the summary table.
+        This can be useful when the overall sample size from the GWAS analysis is available,
+        but not on a per-SNP basis.
+
+        :param n: A scalar or array of sample sizes for each variant.
+        """
+        self.table['N'] = n
+
+    def match(self, reference_table, correct_flips=True):
+        """
+        Match the summary statistics table with a reference table,
+        correcting for potential flips in the effect alleles.
+
+        :param reference_table: The SNP table to use as a reference. Must be a pandas
+        table with at least three columns: SNP, A1, A2.
+        :param correct_flips: If True, correct the direction of effect size
+         estimates if the effect allele is reversed.
+        """
+
+        from .utils.model_utils import merge_snp_tables
+
+        self.table = merge_snp_tables(ref_table=reference_table[['SNP', 'A1', 'A2']],
+                                      alt_table=self.table,
+                                      how='inner',
+                                      correct_flips=correct_flips)
+
+    def filter_by_allele_frequency(self, min_maf=None, min_mac=None):
+        """
+        Filter variants in the summary statistics table by minimum minor allele frequency or allele count
+        :param min_maf: Minimum minor allele frequency
+        :param min_mac: Minimum minor allele count
+        """
+
+        if min_mac or min_maf:
+            maf = self.maf
+            n = self.n_per_snp
+        else:
+            return
+
+        keep_flag = None
+
+        if min_mac and n and maf:
+            mac = (2*maf*n).astype(np.int64)
+            keep_flag = (mac >= min_mac) & ((2*n - mac) >= min_mac)
+
+        if min_maf and maf:
+
+            maf_cond = (maf >= min_maf) & (1. - maf >= min_maf)
+            if keep_flag is not None:
+                keep_flag = keep_flag & maf_cond
+            else:
+                keep_flag = maf_cond
+
+        if keep_flag is not None:
+            self.filter_snps(extract_index=np.where(keep_flag)[0])
+
+    def filter_snps(self, extract_snps=None, extract_file=None, extract_index=None):
+        """
+        Filter the summary statistics table to keep a subset of SNPs.
+        :param extract_snps: A list or array of SNP IDs to keep.
+        :param extract_file: A plink-style file containing the SNP IDs to keep.
+        :param extract_index: A list or array of the indices of SNPs to retain.
+        """
+
+        assert extract_snps is not None or extract_file is not None or extract_index is not None
+
+        if extract_file:
+            from .parsers.misc_parsers import read_snp_filter_file
+            extract_snps = read_snp_filter_file(extract_file)
+
+        if extract_snps is not None:
+            extract_index = intersect_arrays(self.snps, extract_snps, return_index=True)
+
+        if extract_index is not None:
+            self.table = self.table.iloc[extract_index, ].reset_index(drop=True)
+        else:
+            raise Exception("To filter a summary statistics table, you must provide "
+                            "the list of SNPs, a file containing the list of SNPs, or a list of indices to retain.")
+
+    def drop_duplicates(self):
+        """
+        Drop variants with duplicated rsIDs from the summary statistics table.
+        """
+
+        self.table = self.table.drop_duplicates(subset='SNP', keep=False)
+
+    def get_col(self, col_name):
+        """
+        :param col_name: The name of the column to extract.
+
+        :return: The column associated with `col_name` from summary statistics table.
+        """
+        if col_name in self.table.columns:
+            return self.table[col_name].values
+
+    def get_chisq_statistic(self):
+        """
+        :return: The Chi-Squared statistic from the association test of each variant on the phenotype.
+        :raises KeyError: If the Chi-Squared statistic is not available and could not be inferred from available data.
+        """
+        chisq = self.get_col('CHISQ')
+
+        if chisq is not None:
+            return chisq
+        else:
+            z = self.z_score
+            if z is not None:
+                self.table['CHISQ'] = z**2
+            else:
+                p_val = self.p_value
+                if p_val is not None:
+                    from scipy.stats import chi2
+
+                    self.table['CHISQ'] = chi2.ppf(1. - p_val, 1)
+                else:
+                    raise KeyError("Chi-Squared statistic is not available!")
+
+        return self.table['CHISQ'].values
+
+    def get_snp_pseudo_corr(self):
+        """
+
+        Computes the pseudo-correlation coefficient (standardized beta) between the SNP and
+        the phenotype (X_jTy / N) from GWAS summary statistics.
+
+        This method uses Equation 15 in Mak et al. 2017
+
+            $$
+            beta =  z_j / sqrt(n - 1 + z_j^2)
+            $$
+
+        Where `z_j` is the marginal GWAS Z-score.
+
+        !!! seealso "See Also"
+            * [standardized_marginal_beta][magenpy.SumstatsTable.SumstatsTable.standardized_marginal_beta]
+
+        :return: The pseudo-correlation coefficient between the SNP and the phenotype.
+        :raises KeyError: If the Z-scores are not available or the sample size is not available.
+
+        """
+
+        zsc = self.z_score
+        n = self.n
+
+        if zsc is not None:
+            if n is not None:
+                return zsc / (np.sqrt(n - 1 + zsc**2))
+            else:
+                raise KeyError("Sample size is not available!")
+        else:
+            raise KeyError("Z-scores are not available!")
+
+    def get_yy_per_snp(self):
+        """
+        Computes the quantity (y'y)_j/n_j following SBayesR (Lloyd-Jones 2019) and Yang et al. (2012).
+
+        (y'y)_j/n_j is defined as the empirical variance for continuous phenotypes and may be estimated
+        from GWAS summary statistics by re-arranging the equation for the
+        squared standard error:
+
+            $$
+            SE(b_j)^2 = (Var(y) - Var(x_j)*b_j^2) / (Var(x)*n)
+            $$
+
+        Which gives the following estimate:
+
+            $$
+            (y'y)_j / n_j = (n_j - 2)*SE(b_j)^2 + b_j^2
+            $$
+
+        :return: The quantity (y'y)_j/n_j for each SNP in the summary statistics table.
+        :raises KeyError: If the marginal betas, standard errors or sample sizes are not available.
+
+        """
+
+        b = self.beta_hat
+        se = self.standard_error
+        n = self.n
+
+        if n is not None:
+            if b is not None:
+                if se is not None:
+                    return (n - 2)*se**2 + b**2
+                else:
+                    raise KeyError("Standard errors are not available!")
+            else:
+                raise KeyError("Marginal betas are not available!")
+        else:
+            raise KeyError("Sample size per SNP is not available!")
+
+    def split_by_chromosome(self, snps_per_chrom=None):
+        """
+        Split the summary statistics table by chromosome, so that we would
+        have a separate `SumstatsTable` object for each chromosome.
+        :param snps_per_chrom: A dictionary where the keys are the chromosome number 
+        and the value is an array or list of SNPs on that chromosome.
+
+        :return: A dictionary where the keys are the chromosome number and the value is a `SumstatsTable` object.
+        """
+
+        if 'CHR' in self.table.columns:
+            chrom_tables = self.table.groupby('CHR')
+            return {
+                c: SumstatsTable(chrom_tables.get_group(c))
+                for c in chrom_tables.groups
+            }
+        elif snps_per_chrom is not None:
+            chrom_dict = {
+                c: SumstatsTable(pd.DataFrame({'SNP': snps}).merge(self.table))
+                for c, snps in snps_per_chrom.items()
+            }
+
+            for c, ss_tab in chrom_dict.items():
+                ss_tab.table['CHR'] = c
+
+            return chrom_dict
+        else:
+            raise Exception("To split the summary statistics table by chromosome, "
+                            "you must provide the a dictionary mapping chromosome number "
+                            "to an array of SNPs `snps_per_chrom`.")
+
+    def to_table(self, col_subset=None):
+        """
+        A convenience method to extract the summary statistics table or subsets of it.
+
+        :param col_subset: A list corresponding to a subset of columns to return.
+
+        :return: A pandas DataFrame containing the summary statistics with the requested column subset.
+        """
+
+        col_subset = col_subset or ['CHR', 'SNP', 'POS', 'A1', 'A2', 'MAF',
+                                    'N', 'BETA', 'Z', 'SE', 'PVAL']
+
+        # Because some of the quantities that the user needs may be need to be
+        # computed, we separate the column subset into those that are already
+        # present in the table and those that are not (but can still be computed
+        # from other summary statistics):
+
+        present_cols = list(set(col_subset).intersection(set(self.table.columns)))
+        non_present_cols = list(set(col_subset) - set(present_cols))
+
+        if len(present_cols) > 0:
+            table = self.table[present_cols].copy()
+        else:
+            table = pd.DataFrame({c: [] for c in non_present_cols})
+
+        for col in non_present_cols:
+
+            if col == 'Z':
+                table['Z'] = self.z_score
+            elif col == 'PVAL':
+                table['PVAL'] = self.p_value
+            elif col == 'LOG10_PVAL':
+                table['LOG10_PVAL'] = self.log10_p_value
+            elif col == 'CHISQ':
+                table['CHISQ'] = self.get_chisq_statistic()
+            elif col == 'MAF_VAR':
+                table['MAF_VAR'] = self.maf_var
+            elif col == 'STD_BETA':
+                table['STD_BETA'] = self.get_snp_pseudo_corr()
+            else:
+                warnings.warn(f"Column '{col}' is not available in the summary statistics table!")
+
+        return table[list(col_subset)]
+
+    def to_file(self, output_file, col_subset=None, **to_csv_kwargs):
+        """
+        A convenience method to write the summary statistics table to file.
+
+        TODO: Add a format argument to this method and allow the user to output summary statistics
+        according to supported formats (e.g. COJO, plink, fastGWA, etc.).
+
+        :param output_file: The path to the file where to write the summary statistics.
+        :param col_subset: A subset of the columns to write to file.
+        :param to_csv_kwargs: Keyword arguments to pass to pandas' `to_csv` method.
+
+        """
+
+        if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+            to_csv_kwargs['sep'] = '\t'
+
+        if 'index' not in to_csv_kwargs:
+            to_csv_kwargs['index'] = False
+
+        table = self.to_table(col_subset)
+        table.to_csv(output_file, **to_csv_kwargs)
+
+    @classmethod
+    def from_file(cls, sumstats_file, sumstats_format=None, parser=None, **parse_kwargs):
+        """
+        Initialize a summary statistics table from file. The user must provide either
+        the format for the summary statistics file or the parser object
+        (see `parsers.sumstats_parsers`).
+
+        :param sumstats_file: The path to the summary statistics file.
+        :param sumstats_format: The format for the summary statistics file. Currently,
+        we support the following summary statistics formats: `magenpy`, `plink1.9`, `plink` or `plink2`,
+        `COJO`, `fastGWA`, `SAIGE`, `GWASCatalog` (also denoted as `GWAS-SSF` and `SSF`).
+        :param parser: An instance of SumstatsParser parser, implements basic parsing/conversion
+        functionalities.
+        :param parse_kwargs: arguments for the pandas `read_csv` function, such as the delimiter.
+
+        :return: A `SumstatsTable` object initialized from the summary statistics file.
+        """
+        assert sumstats_format is not None or parser is not None
+
+        from .parsers.sumstats_parsers import (
+            SumstatsParser, Plink1SSParser, Plink2SSParser, COJOSSParser,
+            FastGWASSParser, SSFParser, SaigeSSParser
+        )
+
+        sumstats_format_l = sumstats_format.lower()
+
+        if parser is None:
+            if sumstats_format_l == 'magenpy':
+                parser = SumstatsParser(None, **parse_kwargs)
+            elif sumstats_format_l in ('plink', 'plink2'):
+                parser = Plink2SSParser(None, **parse_kwargs)
+            elif sumstats_format_l == 'plink1.9':
+                parser = Plink1SSParser(None, **parse_kwargs)
+            elif sumstats_format_l == 'cojo':
+                parser = COJOSSParser(None, **parse_kwargs)
+            elif sumstats_format_l == 'fastgwa':
+                parser = FastGWASSParser(None, **parse_kwargs)
+            elif sumstats_format_l in ('ssf', 'gwas-ssf', 'gwascatalog'):
+                parser = SSFParser(None, **parse_kwargs)
+            elif sumstats_format_l == 'saige':
+                parser = SaigeSSParser(None, **parse_kwargs)
+            else:
+                raise KeyError(f"Parsers for summary statistics format {sumstats_format} are not implemented!")
+
+        sumstats_table = parser.parse(sumstats_file)
+        return cls(sumstats_table)
+
+
+ + + +
+ + + + + + + +
+ + + +

+ a1 + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The alternative or effect allele for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ a2 + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The reference allele for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ alt_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The alternative or effect allele for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ beta_hat + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The marginal beta from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ bp_pos + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The base pair position for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ chromosome + + + property + + +

+ + +
+ +

A convenience method to return the chromosome number if there is only one chromosome in the summary statistics. +If multiple chromosomes are present, it returns None.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The chromosome number if there is only one chromosome in the summary statistics.

+
+
+
+ +
+ +
+ + + +

+ chromosomes + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The unique chromosomes in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ effect_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The alternative or effect allele for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ effect_sign + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sign for the effect size (1 for positive effect, -1 for negative effect) of each genetic variant ib the phenotype.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the sign could not be inferred from available data.

+
+
+
+ +
+ +
+ + + +

+ log10_p_value + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The negative log10 of the p-value (-log10(p_value)) of association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ m + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ maf + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The minor allele frequency for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ maf_var + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The variance of the minor allele frequency for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ marginal_beta + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The marginal beta from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ n + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size for the association test of each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ n_per_snp + + + property + + +

+ + +
+ +

TODO: Add a way to infer N from other sumstats if missing.

+
+

See Also

+
    +
  • n
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The sample size for the association test of each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ n_snps + + + property + + +

+ + +
+ +
+

See Also

+
    +
  • m
  • +
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of variants in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ odds_ratio + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The odds ratio from the association test of each variant on case-control phenotypes.

+
+
+
+ +
+ +
+ + + +

+ p_value + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The p-value from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ pval + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The p-value from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ ref_allele + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The reference allele for each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ se + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The standard error from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ shape + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

he shape of the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ snps + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The rsIDs associated with each variant in the summary statistics table.

+
+
+
+ +
+ +
+ + + +

+ standard_error + + + property + + +

+ + +
+ +
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The standard error from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ standardized_marginal_beta + + + property + + +

+ + +
+ +

Get the marginal BETAs assuming that both the genotype matrix +and the phenotype vector are standardized column-wise to have mean zero and variance 1. +In some contexts, this is also known as the per-SNP correlation or +pseudo-correlation with the phenotype.

+
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The standardized marginal beta from the association test of each variant on the phenotype.

+
+
+
+ +
+ +
+ + + +

+ z_score + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The Z-score from the association test of each SNP on the phenotype.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the Z-score statistic is not available and could not be inferred from available data.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(ss_table) + +

+ + +
+ +

Initialize the summary statistics table.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ss_table + DataFrame + +
+

A pandas DataFrame containing the summary statistics. !!! seealso "See Also" * from_file

+
+
+ required +
+ +
+ Source code in magenpy/SumstatsTable.py +
26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
def __init__(self, ss_table: pd.DataFrame):
+    """
+    Initialize the summary statistics table.
+
+    :param ss_table: A pandas DataFrame containing the summary statistics.
+
+    !!! seealso "See Also"
+        * [from_file][magenpy.SumstatsTable.SumstatsTable.from_file]
+    """
+    self.table: pd.DataFrame = ss_table
+
+    assert all([col in self.table.columns for col in ('SNP', 'A1')])
+
+
+
+ +
+ + +
+ + + +

+ drop_duplicates() + +

+ + +
+ +

Drop variants with duplicated rsIDs from the summary statistics table.

+ +
+ Source code in magenpy/SumstatsTable.py +
def drop_duplicates(self):
+    """
+    Drop variants with duplicated rsIDs from the summary statistics table.
+    """
+
+    self.table = self.table.drop_duplicates(subset='SNP', keep=False)
+
+
+
+ +
+ + +
+ + + +

+ filter_by_allele_frequency(min_maf=None, min_mac=None) + +

+ + +
+ +

Filter variants in the summary statistics table by minimum minor allele frequency or allele count

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
min_maf + +
+

Minimum minor allele frequency

+
+
+ None +
min_mac + +
+

Minimum minor allele count

+
+
+ None +
+ +
+ Source code in magenpy/SumstatsTable.py +
def filter_by_allele_frequency(self, min_maf=None, min_mac=None):
+    """
+    Filter variants in the summary statistics table by minimum minor allele frequency or allele count
+    :param min_maf: Minimum minor allele frequency
+    :param min_mac: Minimum minor allele count
+    """
+
+    if min_mac or min_maf:
+        maf = self.maf
+        n = self.n_per_snp
+    else:
+        return
+
+    keep_flag = None
+
+    if min_mac and n and maf:
+        mac = (2*maf*n).astype(np.int64)
+        keep_flag = (mac >= min_mac) & ((2*n - mac) >= min_mac)
+
+    if min_maf and maf:
+
+        maf_cond = (maf >= min_maf) & (1. - maf >= min_maf)
+        if keep_flag is not None:
+            keep_flag = keep_flag & maf_cond
+        else:
+            keep_flag = maf_cond
+
+    if keep_flag is not None:
+        self.filter_snps(extract_index=np.where(keep_flag)[0])
+
+
+
+ +
+ + +
+ + + +

+ filter_snps(extract_snps=None, extract_file=None, extract_index=None) + +

+ + +
+ +

Filter the summary statistics table to keep a subset of SNPs.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
extract_snps + +
+

A list or array of SNP IDs to keep.

+
+
+ None +
extract_file + +
+

A plink-style file containing the SNP IDs to keep.

+
+
+ None +
extract_index + +
+

A list or array of the indices of SNPs to retain.

+
+
+ None +
+ +
+ Source code in magenpy/SumstatsTable.py +
def filter_snps(self, extract_snps=None, extract_file=None, extract_index=None):
+    """
+    Filter the summary statistics table to keep a subset of SNPs.
+    :param extract_snps: A list or array of SNP IDs to keep.
+    :param extract_file: A plink-style file containing the SNP IDs to keep.
+    :param extract_index: A list or array of the indices of SNPs to retain.
+    """
+
+    assert extract_snps is not None or extract_file is not None or extract_index is not None
+
+    if extract_file:
+        from .parsers.misc_parsers import read_snp_filter_file
+        extract_snps = read_snp_filter_file(extract_file)
+
+    if extract_snps is not None:
+        extract_index = intersect_arrays(self.snps, extract_snps, return_index=True)
+
+    if extract_index is not None:
+        self.table = self.table.iloc[extract_index, ].reset_index(drop=True)
+    else:
+        raise Exception("To filter a summary statistics table, you must provide "
+                        "the list of SNPs, a file containing the list of SNPs, or a list of indices to retain.")
+
+
+
+ +
+ + +
+ + + +

+ from_file(sumstats_file, sumstats_format=None, parser=None, **parse_kwargs) + + + classmethod + + +

+ + +
+ +

Initialize a summary statistics table from file. The user must provide either +the format for the summary statistics file or the parser object +(see parsers.sumstats_parsers).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sumstats_file + +
+

The path to the summary statistics file.

+
+
+ required +
sumstats_format + +
+

The format for the summary statistics file. Currently, we support the following summary statistics formats: magenpy, plink1.9, plink or plink2, COJO, fastGWA, SAIGE, GWASCatalog (also denoted as GWAS-SSF and SSF).

+
+
+ None +
parser + +
+

An instance of SumstatsParser parser, implements basic parsing/conversion functionalities.

+
+
+ None +
parse_kwargs + +
+

arguments for the pandas read_csv function, such as the delimiter.

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SumstatsTable object initialized from the summary statistics file.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
@classmethod
+def from_file(cls, sumstats_file, sumstats_format=None, parser=None, **parse_kwargs):
+    """
+    Initialize a summary statistics table from file. The user must provide either
+    the format for the summary statistics file or the parser object
+    (see `parsers.sumstats_parsers`).
+
+    :param sumstats_file: The path to the summary statistics file.
+    :param sumstats_format: The format for the summary statistics file. Currently,
+    we support the following summary statistics formats: `magenpy`, `plink1.9`, `plink` or `plink2`,
+    `COJO`, `fastGWA`, `SAIGE`, `GWASCatalog` (also denoted as `GWAS-SSF` and `SSF`).
+    :param parser: An instance of SumstatsParser parser, implements basic parsing/conversion
+    functionalities.
+    :param parse_kwargs: arguments for the pandas `read_csv` function, such as the delimiter.
+
+    :return: A `SumstatsTable` object initialized from the summary statistics file.
+    """
+    assert sumstats_format is not None or parser is not None
+
+    from .parsers.sumstats_parsers import (
+        SumstatsParser, Plink1SSParser, Plink2SSParser, COJOSSParser,
+        FastGWASSParser, SSFParser, SaigeSSParser
+    )
+
+    sumstats_format_l = sumstats_format.lower()
+
+    if parser is None:
+        if sumstats_format_l == 'magenpy':
+            parser = SumstatsParser(None, **parse_kwargs)
+        elif sumstats_format_l in ('plink', 'plink2'):
+            parser = Plink2SSParser(None, **parse_kwargs)
+        elif sumstats_format_l == 'plink1.9':
+            parser = Plink1SSParser(None, **parse_kwargs)
+        elif sumstats_format_l == 'cojo':
+            parser = COJOSSParser(None, **parse_kwargs)
+        elif sumstats_format_l == 'fastgwa':
+            parser = FastGWASSParser(None, **parse_kwargs)
+        elif sumstats_format_l in ('ssf', 'gwas-ssf', 'gwascatalog'):
+            parser = SSFParser(None, **parse_kwargs)
+        elif sumstats_format_l == 'saige':
+            parser = SaigeSSParser(None, **parse_kwargs)
+        else:
+            raise KeyError(f"Parsers for summary statistics format {sumstats_format} are not implemented!")
+
+    sumstats_table = parser.parse(sumstats_file)
+    return cls(sumstats_table)
+
+
+
+ +
+ + +
+ + + +

+ get_chisq_statistic() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The Chi-Squared statistic from the association test of each variant on the phenotype.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the Chi-Squared statistic is not available and could not be inferred from available data.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def get_chisq_statistic(self):
+    """
+    :return: The Chi-Squared statistic from the association test of each variant on the phenotype.
+    :raises KeyError: If the Chi-Squared statistic is not available and could not be inferred from available data.
+    """
+    chisq = self.get_col('CHISQ')
+
+    if chisq is not None:
+        return chisq
+    else:
+        z = self.z_score
+        if z is not None:
+            self.table['CHISQ'] = z**2
+        else:
+            p_val = self.p_value
+            if p_val is not None:
+                from scipy.stats import chi2
+
+                self.table['CHISQ'] = chi2.ppf(1. - p_val, 1)
+            else:
+                raise KeyError("Chi-Squared statistic is not available!")
+
+    return self.table['CHISQ'].values
+
+
+
+ +
+ + +
+ + + +

+ get_col(col_name) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name + +
+

The name of the column to extract.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The column associated with col_name from summary statistics table.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def get_col(self, col_name):
+    """
+    :param col_name: The name of the column to extract.
+
+    :return: The column associated with `col_name` from summary statistics table.
+    """
+    if col_name in self.table.columns:
+        return self.table[col_name].values
+
+
+
+ +
+ + +
+ + + +

+ get_snp_pseudo_corr() + +

+ + +
+ +

Computes the pseudo-correlation coefficient (standardized beta) between the SNP and +the phenotype (X_jTy / N) from GWAS summary statistics.

+

This method uses Equation 15 in Mak et al. 2017

+
$$
+beta =  z_j / sqrt(n - 1 + z_j^2)
+$$
+
+

Where z_j is the marginal GWAS Z-score.

+
+

See Also

+ +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The pseudo-correlation coefficient between the SNP and the phenotype.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the Z-scores are not available or the sample size is not available.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def get_snp_pseudo_corr(self):
+    """
+
+    Computes the pseudo-correlation coefficient (standardized beta) between the SNP and
+    the phenotype (X_jTy / N) from GWAS summary statistics.
+
+    This method uses Equation 15 in Mak et al. 2017
+
+        $$
+        beta =  z_j / sqrt(n - 1 + z_j^2)
+        $$
+
+    Where `z_j` is the marginal GWAS Z-score.
+
+    !!! seealso "See Also"
+        * [standardized_marginal_beta][magenpy.SumstatsTable.SumstatsTable.standardized_marginal_beta]
+
+    :return: The pseudo-correlation coefficient between the SNP and the phenotype.
+    :raises KeyError: If the Z-scores are not available or the sample size is not available.
+
+    """
+
+    zsc = self.z_score
+    n = self.n
+
+    if zsc is not None:
+        if n is not None:
+            return zsc / (np.sqrt(n - 1 + zsc**2))
+        else:
+            raise KeyError("Sample size is not available!")
+    else:
+        raise KeyError("Z-scores are not available!")
+
+
+
+ +
+ + +
+ + + +

+ get_yy_per_snp() + +

+ + +
+ +

Computes the quantity (y'y)_j/n_j following SBayesR (Lloyd-Jones 2019) and Yang et al. (2012).

+

(y'y)_j/n_j is defined as the empirical variance for continuous phenotypes and may be estimated +from GWAS summary statistics by re-arranging the equation for the +squared standard error:

+
$$
+SE(b_j)^2 = (Var(y) - Var(x_j)*b_j^2) / (Var(x)*n)
+$$
+
+

Which gives the following estimate:

+
$$
+(y'y)_j / n_j = (n_j - 2)*SE(b_j)^2 + b_j^2
+$$
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The quantity (y'y)_j/n_j for each SNP in the summary statistics table.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ KeyError + +
+

If the marginal betas, standard errors or sample sizes are not available.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def get_yy_per_snp(self):
+    """
+    Computes the quantity (y'y)_j/n_j following SBayesR (Lloyd-Jones 2019) and Yang et al. (2012).
+
+    (y'y)_j/n_j is defined as the empirical variance for continuous phenotypes and may be estimated
+    from GWAS summary statistics by re-arranging the equation for the
+    squared standard error:
+
+        $$
+        SE(b_j)^2 = (Var(y) - Var(x_j)*b_j^2) / (Var(x)*n)
+        $$
+
+    Which gives the following estimate:
+
+        $$
+        (y'y)_j / n_j = (n_j - 2)*SE(b_j)^2 + b_j^2
+        $$
+
+    :return: The quantity (y'y)_j/n_j for each SNP in the summary statistics table.
+    :raises KeyError: If the marginal betas, standard errors or sample sizes are not available.
+
+    """
+
+    b = self.beta_hat
+    se = self.standard_error
+    n = self.n
+
+    if n is not None:
+        if b is not None:
+            if se is not None:
+                return (n - 2)*se**2 + b**2
+            else:
+                raise KeyError("Standard errors are not available!")
+        else:
+            raise KeyError("Marginal betas are not available!")
+    else:
+        raise KeyError("Sample size per SNP is not available!")
+
+
+
+ +
+ + +
+ + + +

+ infer_a2(reference_table, allow_na=False) + +

+ + +
+ +

Infer the reference allele A2 (if not present in the SumstatsTable) +from a reference table. Make sure that the reference table contains the SNP ID, +the reference allele A2 and the alternative (i.e. effect) allele A1. It is the +user's responsibility to make sure that the reference table matches the summary +statistics in terms of the specification of reference vs. alternative. They are +allowed to be flipped, but they have to be consistent across the two tables.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
reference_table + +
+

A pandas table containing the following columns at least: SNP, A1, A2.

+
+
+ required +
allow_na + +
+

If True, allow the reference allele to be missing from the final result.

+
+
+ False +
+ +
+ Source code in magenpy/SumstatsTable.py +
def infer_a2(self, reference_table, allow_na=False):
+    """
+    Infer the reference allele A2 (if not present in the SumstatsTable)
+    from a reference table. Make sure that the reference table contains the SNP ID,
+    the reference allele A2 and the alternative (i.e. effect) allele A1. It is the
+    user's responsibility to make sure that the reference table matches the summary
+    statistics in terms of the specification of reference vs. alternative. They are
+    allowed to be flipped, but they have to be consistent across the two tables.
+
+    :param reference_table: A pandas table containing the following columns at least:
+    `SNP`, `A1`, `A2`.
+    :param allow_na: If True, allow the reference allele to be missing from the final result.
+    """
+
+    # Merge the summary statistics table with the reference table on `SNP` ID:
+    merged_table = self.table[['SNP', 'A1']].merge(reference_table[['SNP', 'A1', 'A2']],
+                                                   how='left',
+                                                   on='SNP')
+    # If `A1_x` agrees with `A1_y`, then `A2` is indeed the reference allele.
+    # Otherwise, they are flipped and `A1_y` should be the reference allele:
+    merged_table['A2'] = np.where(merged_table['A1_x'] == merged_table['A1_y'],
+                                  merged_table['A2'],
+                                  merged_table['A1_y'])
+
+    # Check that the reference allele could be inferred for all SNPs:
+    if not allow_na and merged_table['A2'].isna().any():
+        raise ValueError("The reference allele could not be inferred for some SNPs!")
+    else:
+        self.table['A2'] = merged_table['A2']
+
+
+
+ +
+ + +
+ + + +

+ match(reference_table, correct_flips=True) + +

+ + +
+ +

Match the summary statistics table with a reference table, +correcting for potential flips in the effect alleles.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
reference_table + +
+

The SNP table to use as a reference. Must be a pandas table with at least three columns: SNP, A1, A2.

+
+
+ required +
correct_flips + +
+

If True, correct the direction of effect size estimates if the effect allele is reversed.

+
+
+ True +
+ +
+ Source code in magenpy/SumstatsTable.py +
def match(self, reference_table, correct_flips=True):
+    """
+    Match the summary statistics table with a reference table,
+    correcting for potential flips in the effect alleles.
+
+    :param reference_table: The SNP table to use as a reference. Must be a pandas
+    table with at least three columns: SNP, A1, A2.
+    :param correct_flips: If True, correct the direction of effect size
+     estimates if the effect allele is reversed.
+    """
+
+    from .utils.model_utils import merge_snp_tables
+
+    self.table = merge_snp_tables(ref_table=reference_table[['SNP', 'A1', 'A2']],
+                                  alt_table=self.table,
+                                  how='inner',
+                                  correct_flips=correct_flips)
+
+
+
+ +
+ + +
+ + + +

+ set_sample_size(n) + +

+ + +
+ +

Set the sample size for each variant in the summary table. +This can be useful when the overall sample size from the GWAS analysis is available, +but not on a per-SNP basis.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
n + +
+

A scalar or array of sample sizes for each variant.

+
+
+ required +
+ +
+ Source code in magenpy/SumstatsTable.py +
def set_sample_size(self, n):
+    """
+    Set the sample size for each variant in the summary table.
+    This can be useful when the overall sample size from the GWAS analysis is available,
+    but not on a per-SNP basis.
+
+    :param n: A scalar or array of sample sizes for each variant.
+    """
+    self.table['N'] = n
+
+
+
+ +
+ + +
+ + + +

+ split_by_chromosome(snps_per_chrom=None) + +

+ + +
+ +

Split the summary statistics table by chromosome, so that we would +have a separate SumstatsTable object for each chromosome.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
snps_per_chrom + +
+

A dictionary where the keys are the chromosome number and the value is an array or list of SNPs on that chromosome.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary where the keys are the chromosome number and the value is a SumstatsTable object.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def split_by_chromosome(self, snps_per_chrom=None):
+    """
+    Split the summary statistics table by chromosome, so that we would
+    have a separate `SumstatsTable` object for each chromosome.
+    :param snps_per_chrom: A dictionary where the keys are the chromosome number 
+    and the value is an array or list of SNPs on that chromosome.
+
+    :return: A dictionary where the keys are the chromosome number and the value is a `SumstatsTable` object.
+    """
+
+    if 'CHR' in self.table.columns:
+        chrom_tables = self.table.groupby('CHR')
+        return {
+            c: SumstatsTable(chrom_tables.get_group(c))
+            for c in chrom_tables.groups
+        }
+    elif snps_per_chrom is not None:
+        chrom_dict = {
+            c: SumstatsTable(pd.DataFrame({'SNP': snps}).merge(self.table))
+            for c, snps in snps_per_chrom.items()
+        }
+
+        for c, ss_tab in chrom_dict.items():
+            ss_tab.table['CHR'] = c
+
+        return chrom_dict
+    else:
+        raise Exception("To split the summary statistics table by chromosome, "
+                        "you must provide the a dictionary mapping chromosome number "
+                        "to an array of SNPs `snps_per_chrom`.")
+
+
+
+ +
+ + +
+ + + +

+ to_file(output_file, col_subset=None, **to_csv_kwargs) + +

+ + +
+ +

A convenience method to write the summary statistics table to file.

+

TODO: Add a format argument to this method and allow the user to output summary statistics +according to supported formats (e.g. COJO, plink, fastGWA, etc.).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_file + +
+

The path to the file where to write the summary statistics.

+
+
+ required +
col_subset + +
+

A subset of the columns to write to file.

+
+
+ None +
to_csv_kwargs + +
+

Keyword arguments to pass to pandas' to_csv method.

+
+
+ {} +
+ +
+ Source code in magenpy/SumstatsTable.py +
def to_file(self, output_file, col_subset=None, **to_csv_kwargs):
+    """
+    A convenience method to write the summary statistics table to file.
+
+    TODO: Add a format argument to this method and allow the user to output summary statistics
+    according to supported formats (e.g. COJO, plink, fastGWA, etc.).
+
+    :param output_file: The path to the file where to write the summary statistics.
+    :param col_subset: A subset of the columns to write to file.
+    :param to_csv_kwargs: Keyword arguments to pass to pandas' `to_csv` method.
+
+    """
+
+    if 'sep' not in to_csv_kwargs and 'delimiter' not in to_csv_kwargs:
+        to_csv_kwargs['sep'] = '\t'
+
+    if 'index' not in to_csv_kwargs:
+        to_csv_kwargs['index'] = False
+
+    table = self.to_table(col_subset)
+    table.to_csv(output_file, **to_csv_kwargs)
+
+
+
+ +
+ + +
+ + + +

+ to_table(col_subset=None) + +

+ + +
+ +

A convenience method to extract the summary statistics table or subsets of it.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_subset + +
+

A list corresponding to a subset of columns to return.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame containing the summary statistics with the requested column subset.

+
+
+ +
+ Source code in magenpy/SumstatsTable.py +
def to_table(self, col_subset=None):
+    """
+    A convenience method to extract the summary statistics table or subsets of it.
+
+    :param col_subset: A list corresponding to a subset of columns to return.
+
+    :return: A pandas DataFrame containing the summary statistics with the requested column subset.
+    """
+
+    col_subset = col_subset or ['CHR', 'SNP', 'POS', 'A1', 'A2', 'MAF',
+                                'N', 'BETA', 'Z', 'SE', 'PVAL']
+
+    # Because some of the quantities that the user needs may be need to be
+    # computed, we separate the column subset into those that are already
+    # present in the table and those that are not (but can still be computed
+    # from other summary statistics):
+
+    present_cols = list(set(col_subset).intersection(set(self.table.columns)))
+    non_present_cols = list(set(col_subset) - set(present_cols))
+
+    if len(present_cols) > 0:
+        table = self.table[present_cols].copy()
+    else:
+        table = pd.DataFrame({c: [] for c in non_present_cols})
+
+    for col in non_present_cols:
+
+        if col == 'Z':
+            table['Z'] = self.z_score
+        elif col == 'PVAL':
+            table['PVAL'] = self.p_value
+        elif col == 'LOG10_PVAL':
+            table['LOG10_PVAL'] = self.log10_p_value
+        elif col == 'CHISQ':
+            table['CHISQ'] = self.get_chisq_statistic()
+        elif col == 'MAF_VAR':
+            table['MAF_VAR'] = self.maf_var
+        elif col == 'STD_BETA':
+            table['STD_BETA'] = self.get_snp_pseudo_corr()
+        else:
+            warnings.warn(f"Column '{col}' is not available in the summary statistics table!")
+
+    return table[list(col_subset)]
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/overview/index.html b/api/overview/index.html new file mode 100644 index 0000000..2684824 --- /dev/null +++ b/api/overview/index.html @@ -0,0 +1,896 @@ + + + + + + + + + + + + + + + + + + + + + API Reference - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

API Reference

+ +

Data Structures

+
    +
  • GWADataLoader: A general class for loading multiple statistical genetics data sources and +harmonizing them for downstream analyses.
  • +
  • GenotypeMatrix: A class for representing on-disk genotype matrices. It provides +interfaces for querying / manipulating / and performing computations on genotype data.
  • +
  • LDMatrix: A class for representing on-disk Linkage-Disequilibrium (LD) matrices. It provides +interfaces for querying / manipulating / and performing computations on LD data.
  • +
  • SampleTable: A class for representing data about samples (individuals), including covariates, +phenotypes, and other sample-specific metadata.
  • +
  • SumstatsTable: A class for representing summary statistics data from a GWAS study. It provides +interfaces for querying / manipulating / and performing computations on summary statistics data.
  • +
  • AnnotationMatrix: A class for representing variant annotations (e.g. functional annotations, +pathogenicity scores, etc.) for a set of variants. It provides interfaces for querying / manipulating / and +performing computations on annotation data.
  • +
+

Simulation

+ +

Parsers

+
    +
  • Sumstats Parsers: A collection of parsers for reading GWAS summary statistics files in various formats.
  • +
  • Annotation Parsers: A collection of parsers for reading variant annotation files in various formats.
  • +
  • Plink Parsers: A collection of parsers for reading PLINK files (BED/BIM/FAM) and other PLINK-related formats.
  • +
+

Statistics

+

Plotting

+
    +
  • GWAS plots: Functions for plotting various quantities / results from GWAS studies.
  • +
  • LD plots: Functions for plotting various quantities from LD matrices.
  • +
+

Utilities

+
    +
  • Compute utilities: Utilities for computing various statistics / quantities over python data structures.
  • +
  • Data utilities: Utilities for downloading and processing relevant data.
  • +
  • Executors: A collection of classes for interfacing with third party software, such as plink.
  • +
  • Model utilities: Utilities for merging / aligning / filtering GWAS data sources.
  • +
  • System utilities: Utilities for interfacing with the system environment (e.g. file I/O, environment variables, etc.).
  • +
+

Data

+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/parsers/annotation_parsers/index.html b/api/parsers/annotation_parsers/index.html new file mode 100644 index 0000000..c1dcdf6 --- /dev/null +++ b/api/parsers/annotation_parsers/index.html @@ -0,0 +1,1499 @@ + + + + + + + + + + + + + + + + + + + Annotation parsers - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Annotation parsers

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ AnnotationMatrixParser + + +

+ + +
+

+ Bases: object

+ + +

A generic annotation matrix parser class.

+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
class AnnotationMatrixParser(object):
+    """
+    A generic annotation matrix parser class.
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        :param col_name_converter: A dictionary mapping column names
+        in the original table to magenpy's column names for the various
+        SNP features in the annotation matrix.
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+        """
+
+        self.col_name_converter = col_name_converter
+        self.read_csv_kwargs = read_csv_kwargs
+
+        # If the delimiter is not specified, assume whitespace by default:
+        if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
+            self.read_csv_kwargs['sep'] = r'\s+'
+
+    def parse(self, annotation_file, drop_na=True):
+        """
+        Parse the annotation matrix file
+        :param annotation_file: The path to the annotation file.
+        :param drop_na: Drop any entries with missing values.
+        """
+
+        try:
+            df = pd.read_csv(annotation_file, **self.read_csv_kwargs)
+        except Exception as e:
+            raise e
+
+        if drop_na:
+            df = df.dropna()
+
+        if self.col_name_converter is not None:
+            df.rename(columns=self.col_name_converter, inplace=True)
+
+        df.sort_values(['CHR', 'POS'], inplace=True)
+
+        annotations = [ann for ann in df.columns if ann not in ('CHR', 'SNP', 'POS')]
+
+        return df, annotations
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names for the various SNP features in the annotation matrix.

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    :param col_name_converter: A dictionary mapping column names
+    in the original table to magenpy's column names for the various
+    SNP features in the annotation matrix.
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+    """
+
+    self.col_name_converter = col_name_converter
+    self.read_csv_kwargs = read_csv_kwargs
+
+    # If the delimiter is not specified, assume whitespace by default:
+    if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
+        self.read_csv_kwargs['sep'] = r'\s+'
+
+
+
+ +
+ + +
+ + + +

+ parse(annotation_file, drop_na=True) + +

+ + +
+ +

Parse the annotation matrix file

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annotation_file + +
+

The path to the annotation file.

+
+
+ required +
drop_na + +
+

Drop any entries with missing values.

+
+
+ True +
+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
def parse(self, annotation_file, drop_na=True):
+    """
+    Parse the annotation matrix file
+    :param annotation_file: The path to the annotation file.
+    :param drop_na: Drop any entries with missing values.
+    """
+
+    try:
+        df = pd.read_csv(annotation_file, **self.read_csv_kwargs)
+    except Exception as e:
+        raise e
+
+    if drop_na:
+        df = df.dropna()
+
+    if self.col_name_converter is not None:
+        df.rename(columns=self.col_name_converter, inplace=True)
+
+    df.sort_values(['CHR', 'POS'], inplace=True)
+
+    annotations = [ann for ann in df.columns if ann not in ('CHR', 'SNP', 'POS')]
+
+    return df, annotations
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ LDSCAnnotationMatrixParser + + +

+ + +
+

+ Bases: AnnotationMatrixParser

+ + +
+ Source code in magenpy/parsers/annotation_parsers.py +
49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
class LDSCAnnotationMatrixParser(AnnotationMatrixParser):
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        :param col_name_converter: A dictionary mapping column names
+        in the original table to magenpy's column names for the various
+        SNP features in the annotation matrix.
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+
+        super().__init__(col_name_converter, **read_csv_kwargs)
+        self.col_name_converter = self.col_name_converter or {}
+        self.col_name_converter.update(
+            {
+                'BP': 'POS'
+            }
+        )
+
+    def parse(self, annotation_file, drop_na=True):
+        """
+        Parse the annotation matrix file
+        :param annotation_file: The path to the annotation file.
+        :param drop_na: Drop any entries with missing values.
+        """
+
+        df, annotations = super().parse(annotation_file, drop_na=drop_na)
+
+        df = df.drop(['CM', 'base'], axis=1)
+        annotations = [ann for ann in annotations if ann not in ('CM', 'base')]
+
+        return df, annotations
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names for the various SNP features in the annotation matrix.

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    :param col_name_converter: A dictionary mapping column names
+    in the original table to magenpy's column names for the various
+    SNP features in the annotation matrix.
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+
+    super().__init__(col_name_converter, **read_csv_kwargs)
+    self.col_name_converter = self.col_name_converter or {}
+    self.col_name_converter.update(
+        {
+            'BP': 'POS'
+        }
+    )
+
+
+
+ +
+ + +
+ + + +

+ parse(annotation_file, drop_na=True) + +

+ + +
+ +

Parse the annotation matrix file

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annotation_file + +
+

The path to the annotation file.

+
+
+ required +
drop_na + +
+

Drop any entries with missing values.

+
+
+ True +
+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
def parse(self, annotation_file, drop_na=True):
+    """
+    Parse the annotation matrix file
+    :param annotation_file: The path to the annotation file.
+    :param drop_na: Drop any entries with missing values.
+    """
+
+    df, annotations = super().parse(annotation_file, drop_na=drop_na)
+
+    df = df.drop(['CM', 'base'], axis=1)
+    annotations = [ann for ann in annotations if ann not in ('CM', 'base')]
+
+    return df, annotations
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + +
+ + + +

+ parse_annotation_bed_file(annot_bed_file) + +

+ + +
+ +

Parse an annotation bed file in the format specified by Ensemble: +https://uswest.ensembl.org/info/website/upload/bed.html

+

The file contains 3-12 columns, starting with Chromosome, start_coordinate, end_coordinate, etc. +After reading the raw file, we let pandas infer whether the file has a header or not and we +standardize the names of the first 3 columns and convert the chromosome column into an integer.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
annot_bed_file + str + +
+

The path to the annotation BED file.

+
+
+ required +
+ +
+ Source code in magenpy/parsers/annotation_parsers.py +
def parse_annotation_bed_file(annot_bed_file):
+    """
+    Parse an annotation bed file in the format specified by Ensemble:
+    https://uswest.ensembl.org/info/website/upload/bed.html
+
+    The file contains 3-12 columns, starting with Chromosome, start_coordinate, end_coordinate, etc.
+    After reading the raw file, we let pandas infer whether the file has a header or not and we
+    standardize the names of the first 3 columns and convert the chromosome column into an integer.
+
+    :param annot_bed_file: The path to the annotation BED file.
+    :type annot_bed_file: str
+    """
+
+    try:
+        annot_bed = pd.read_csv(annot_bed_file, usecols=[0, 1, 2],
+                                sep=r'\s+',
+                                names=['CHR', 'Start', 'End'])
+    except Exception as e:
+        raise e
+
+    annot_bed['CHR'] = annot_bed['CHR'].str.replace('chr', '').astype(int)
+
+    return annot_bed
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/parsers/misc_parsers/index.html b/api/parsers/misc_parsers/index.html new file mode 100644 index 0000000..bf694da --- /dev/null +++ b/api/parsers/misc_parsers/index.html @@ -0,0 +1,1128 @@ + + + + + + + + + + + + + + + + + + + Misc parsers - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Misc parsers

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ parse_cluster_assignment_file(cluster_assignment_file) + +

+ + +
+ +

Parses a file that maps each individual in the sample table to a cluster, +and returns the pandas dataframe. The expected file should be whitespace delimited +and contain three columns: FID, IID, and Cluster

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cluster_assignment_file + str + +
+

The path to the cluster assignment file.

+
+
+ required +
+ +
+ Source code in magenpy/parsers/misc_parsers.py +
73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
def parse_cluster_assignment_file(cluster_assignment_file):
+    """
+    Parses a file that maps each individual in the sample table to a cluster,
+    and returns the pandas dataframe. The expected file should be whitespace delimited
+    and contain three columns: FID, IID, and Cluster
+
+    :param cluster_assignment_file: The path to the cluster assignment file.
+    :type cluster_assignment_file: str
+    """
+    try:
+        clusters = pd.read_csv(cluster_assignment_file, sep=r'\s+')
+        clusters.columns = ['FID', 'IID', 'Cluster']
+    except Exception as e:
+        raise e
+
+    return clusters
+
+
+
+ +
+ + +
+ + + +

+ parse_ld_block_data(ldb_file_path) + +

+ + +
+ +

This function takes a path to a file with the LD blocks +and returns a dictionary with the chromosome ID and a list of the +start and end positions for the blocks in that chromosome. +The parser assumes that the LD block files have the ldetect format: +https://bitbucket.org/nygcresearch/ldetect-data/src/master/

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ldb_file_path + str + +
+

The path (or URL) to the LD blocks file

+
+
+ required +
+ +
+ Source code in magenpy/parsers/misc_parsers.py +
43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
def parse_ld_block_data(ldb_file_path):
+    """
+    This function takes a path to a file with the LD blocks
+    and returns a dictionary with the chromosome ID and a list of the
+    start and end positions for the blocks in that chromosome.
+    The parser assumes that the LD block files have the ldetect format:
+    https://bitbucket.org/nygcresearch/ldetect-data/src/master/
+
+    :param ldb_file_path: The path (or URL) to the LD blocks file
+    :type ldb_file_path: str
+    """
+
+    ld_blocks = {}
+
+    df = pd.read_csv(ldb_file_path, sep=r'\s+')
+
+    df = df.loc[(df.start != 'None') & (df.stop != 'None')]
+    df = df.astype({'chr': str, 'start': np.int32, 'stop': np.int32})
+    df = df.sort_values('start')
+
+    if df.isnull().values.any():
+        raise ValueError("The LD block data contains missing information. This may result in invalid "
+                         "LD boundaries. Please fix the LD block files before continuing!")
+
+    for chrom in df['chr'].unique():
+        ld_blocks[int(chrom.replace('chr', ''))] = df.loc[df['chr'] == chrom, ['start', 'stop']].values
+
+    return ld_blocks
+
+
+
+ +
+ + +
+ + + +

+ read_sample_filter_file(filename) + +

+ + +
+ +

Read plink-style file listing sample IDs. +The file should not have a header, be tab-separated, and has two +columns corresponding to Family ID (FID) and Individual ID (IID). +You may also pass a file with a single-column of Individual IDs instead.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filename + str + +
+

The path to the file containing the sample IDs

+
+
+ required +
+ +
+ Source code in magenpy/parsers/misc_parsers.py +
24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
def read_sample_filter_file(filename):
+    """
+    Read plink-style file listing sample IDs.
+    The file should not have a header, be tab-separated, and has two
+    columns corresponding to Family ID (FID) and Individual ID (IID).
+    You may also pass a file with a single-column of Individual IDs instead.
+
+    :param filename: The path to the file containing the sample IDs
+    :type filename: str
+    """
+
+    keep_list = pd.read_csv(filename, sep="\t", header=None).values
+
+    if keep_list.shape[1] == 1:
+        return keep_list[:, 0]
+    elif keep_list.shape[1] == 2:
+        return keep_list[:, 1]
+
+
+
+ +
+ + +
+ + + +

+ read_snp_filter_file(filename, snp_id_col=0) + +

+ + +
+ +

Read plink-style file listing variant IDs. +The file should not have a header and only has a single column.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filename + str + +
+

The path to the file containing the SNP IDs

+
+
+ required +
snp_id_col + int + +
+

The column index containing the SNP IDs

+
+
+ 0 +
+ +
+ Source code in magenpy/parsers/misc_parsers.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
def read_snp_filter_file(filename, snp_id_col=0):
+    """
+    Read plink-style file listing variant IDs.
+    The file should not have a header and only has a single column.
+
+    :param filename: The path to the file containing the SNP IDs
+    :type filename: str
+    :param snp_id_col: The column index containing the SNP IDs
+    :type snp_id_col: int
+    """
+
+    try:
+        keep_list = pd.read_csv(filename, sep="\t", header=None).values[:, snp_id_col]
+    except Exception as e:
+        raise e
+
+    return keep_list
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/parsers/plink_parsers/index.html b/api/parsers/plink_parsers/index.html new file mode 100644 index 0000000..d84fc43 --- /dev/null +++ b/api/parsers/plink_parsers/index.html @@ -0,0 +1,1018 @@ + + + + + + + + + + + + + + + + + + + Plink parsers - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Plink parsers

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + + + +
+ +

From the plink documentation: +https://www.cog-genomics.org/plink/1.9/formats#bim

+
A text file with no header line, and one line per variant with the following six fields:
+
+- Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name
+- Variant identifier
+- Position in morgans or centimorgans (safe to use dummy value of '0')
+- Base-pair coordinate (1-based; limited to 231-2)
+- Allele 1 (corresponding to clear bits in .bed; usually minor)
+- Allele 2 (corresponding to set bits in .bed; usually major)
+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
plink_bfile + str + +
+

The path to the plink bfile (with or without the extension).

+
+
+ required +
+ +
+ Source code in magenpy/parsers/plink_parsers.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def parse_bim_file(plink_bfile):
+    """
+    From the plink documentation:
+    https://www.cog-genomics.org/plink/1.9/formats#bim
+
+        A text file with no header line, and one line per variant with the following six fields:
+
+        - Chromosome code (either an integer, or 'X'/'Y'/'XY'/'MT'; '0' indicates unknown) or name
+        - Variant identifier
+        - Position in morgans or centimorgans (safe to use dummy value of '0')
+        - Base-pair coordinate (1-based; limited to 231-2)
+        - Allele 1 (corresponding to clear bits in .bed; usually minor)
+        - Allele 2 (corresponding to set bits in .bed; usually major)
+
+    :param plink_bfile: The path to the plink bfile (with or without the extension).
+    :type plink_bfile: str
+    """
+
+    if '.bim' not in plink_bfile:
+        if '.bed' in plink_bfile:
+            plink_bfile = plink_bfile.replace('.bed', '.bim')
+        else:
+            plink_bfile = plink_bfile + '.bim'
+
+    bim_df = pd.read_csv(plink_bfile,
+                         sep=r'\s+',
+                         names=['CHR', 'SNP', 'cM', 'POS', 'A1', 'A2'],
+                         dtype={
+                             'CHR': int,
+                             'SNP': str,
+                             'cM': np.float32,
+                             'POS': np.int32,
+                             'A1': str,
+                             'A2': str
+                         })
+
+    return bim_df
+
+
+
+ +
+ + +
+ + + + + + +
+ +

From the plink documentation: +https://www.cog-genomics.org/plink/1.9/formats#fam

+
A text file with no header line, and one line per sample with the following six fields:
+
+- Family ID ('FID')
+- Within-family ID ('IID'; cannot be '0')
+- Within-family ID of father ('0' if father isn't in dataset)
+- Within-family ID of mother ('0' if mother isn't in dataset)
+- Sex code ('1' = male, '2' = female, '0' = unknown)
+- Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric = missing data if case/control)
+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
plink_bfile + str + +
+

The path to the plink bfile (with or without the extension).

+
+
+ required +
+ +
+ Source code in magenpy/parsers/plink_parsers.py +
44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
def parse_fam_file(plink_bfile):
+    """
+    From the plink documentation:
+    https://www.cog-genomics.org/plink/1.9/formats#fam
+
+        A text file with no header line, and one line per sample with the following six fields:
+
+        - Family ID ('FID')
+        - Within-family ID ('IID'; cannot be '0')
+        - Within-family ID of father ('0' if father isn't in dataset)
+        - Within-family ID of mother ('0' if mother isn't in dataset)
+        - Sex code ('1' = male, '2' = female, '0' = unknown)
+        - Phenotype value ('1' = control, '2' = case, '-9'/'0'/non-numeric = missing data if case/control)
+
+    :param plink_bfile: The path to the plink bfile (with or without the extension).
+    :type plink_bfile: str
+    """
+
+    if '.fam' not in plink_bfile:
+        if '.bed' in plink_bfile:
+            plink_bfile = plink_bfile.replace('.bed', '.fam')
+        else:
+            plink_bfile = plink_bfile + '.fam'
+
+    fam_df = pd.read_csv(plink_bfile,
+                         sep=r'\s+',
+                         usecols=list(range(6)),
+                         names=['FID', 'IID', 'fatherID', 'motherID', 'sex', 'phenotype'],
+                         dtype={'FID': str,
+                                'IID': str,
+                                'fatherID': str,
+                                'motherID': str,
+                                'sex': np.float32,
+                                'phenotype': np.float32
+                                },
+                         na_values={
+                             'phenotype': [-9.],
+                             'sex': [0]
+                         })
+
+    # If the phenotype is all null or unknown, drop the column:
+    if fam_df['phenotype'].isnull().all():
+        fam_df.drop('phenotype', axis=1, inplace=True)
+
+    # If the sex column is all null or unknown, drop the column:
+    if fam_df['sex'].isnull().all():
+        fam_df.drop('sex', axis=1, inplace=True)
+
+    return fam_df
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/parsers/sumstats_parsers/index.html b/api/parsers/sumstats_parsers/index.html new file mode 100644 index 0000000..30cf74a --- /dev/null +++ b/api/parsers/sumstats_parsers/index.html @@ -0,0 +1,3446 @@ + + + + + + + + + + + + + + + + + + + Sumstats parsers - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Sumstats parsers

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ COJOSSParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics files generated by the COJO software.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class COJOSSParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics files generated by the `COJO` software.
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+
+        Initialize the COJO summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        self.col_name_converter.update(
+            {
+                'freq': 'MAF',
+                'b': 'BETA',
+                'se': 'SE',
+                'p': 'PVAL'
+            }
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the COJO summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+
+    Initialize the COJO summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    self.col_name_converter.update(
+        {
+            'freq': 'MAF',
+            'b': 'BETA',
+            'se': 'SE',
+            'p': 'PVAL'
+        }
+    )
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ FastGWASSParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics files generated by the FastGWA software.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class FastGWASSParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics files generated by the `FastGWA` software.
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        self.col_name_converter.update(
+            {
+                'AF1': 'MAF',
+                'P': 'PVAL'
+            }
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    self.col_name_converter.update(
+        {
+            'AF1': 'MAF',
+            'P': 'PVAL'
+        }
+    )
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ Plink1SSParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics files generated by plink1.9.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class Plink1SSParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics files generated by `plink1.9`.
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        Initialize the `plink1.9` summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        self.col_name_converter.update(
+            {
+                'P': 'PVAL',
+                'NMISS': 'N',
+                'STAT': 'Z',
+                'BP': 'POS'
+            }
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the plink1.9 summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    Initialize the `plink1.9` summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    self.col_name_converter.update(
+        {
+            'P': 'PVAL',
+            'NMISS': 'N',
+            'STAT': 'Z',
+            'BP': 'POS'
+        }
+    )
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ Plink2SSParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics files generated by plink2.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class Plink2SSParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics files generated by `plink2`.
+
+    !!! seealso "See Also"
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+
+        Initialize the `plink2` summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        self.col_name_converter.update(
+            {
+                '#CHROM': 'CHR',
+                'ID': 'SNP',
+                'P': 'PVAL',
+                'OBS_CT': 'N',
+                'A1_FREQ': 'MAF',
+                'T_STAT': 'Z',
+                'Z_STAT': 'Z'
+            }
+        )
+
+    def parse(self, file_name, drop_na=True):
+        """
+        Parse a summary statistics file.
+        :param file_name: The path to the summary statistics file.
+        :param drop_na: Drop any entries with missing values.
+
+        :return: A pandas DataFrame containing the parsed summary statistics.
+        """
+
+        df = super().parse(file_name, drop_na=drop_na)
+
+        if 'A2' not in df.columns:
+            try:
+                if 'ALT1' in df.columns:
+                    df['A2'] = np.where(df['A1'] == df['ALT1'], df['REF'], df['ALT1'])
+                elif 'ALT' in df.columns:
+                    df['A2'] = np.where(df['A1'] == df['ALT'], df['REF'], df['ALT'])
+                else:
+                    warnings.warn("The reference allele A2 could not be inferred "
+                                  "from the summary statistics file!")
+            except KeyError:
+                warnings.warn("The reference allele A2 could not be inferred "
+                              "from the summary statistics file! Some of the columns needed to infer "
+                              "the A2 allele are missing or coded differently than what we expect.")
+
+        return df
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the plink2 summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+
+    Initialize the `plink2` summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    self.col_name_converter.update(
+        {
+            '#CHROM': 'CHR',
+            'ID': 'SNP',
+            'P': 'PVAL',
+            'OBS_CT': 'N',
+            'A1_FREQ': 'MAF',
+            'T_STAT': 'Z',
+            'Z_STAT': 'Z'
+        }
+    )
+
+
+
+ +
+ + +
+ + + +

+ parse(file_name, drop_na=True) + +

+ + +
+ +

Parse a summary statistics file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_name + +
+

The path to the summary statistics file.

+
+
+ required +
drop_na + +
+

Drop any entries with missing values.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame containing the parsed summary statistics.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def parse(self, file_name, drop_na=True):
+    """
+    Parse a summary statistics file.
+    :param file_name: The path to the summary statistics file.
+    :param drop_na: Drop any entries with missing values.
+
+    :return: A pandas DataFrame containing the parsed summary statistics.
+    """
+
+    df = super().parse(file_name, drop_na=drop_na)
+
+    if 'A2' not in df.columns:
+        try:
+            if 'ALT1' in df.columns:
+                df['A2'] = np.where(df['A1'] == df['ALT1'], df['REF'], df['ALT1'])
+            elif 'ALT' in df.columns:
+                df['A2'] = np.where(df['A1'] == df['ALT'], df['REF'], df['ALT'])
+            else:
+                warnings.warn("The reference allele A2 could not be inferred "
+                              "from the summary statistics file!")
+        except KeyError:
+            warnings.warn("The reference allele A2 could not be inferred "
+                          "from the summary statistics file! Some of the columns needed to infer "
+                          "the A2 allele are missing or coded differently than what we expect.")
+
+    return df
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ SSFParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics that are formatted according + to the standardized summary statistics format adopted by the GWAS Catalog. This format is + sometimes denoted as GWAS-SSF.

+

Reference and details: +https://github.com/EBISPOT/gwas-summary-statistics-standard

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class SSFParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics that are formatted according
+     to the standardized summary statistics format adopted by the GWAS Catalog. This format is
+     sometimes denoted as `GWAS-SSF`.
+
+    Reference and details:
+    https://github.com/EBISPOT/gwas-summary-statistics-standard
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+
+        Initialize the standardized summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        self.col_name_converter.update(
+            {
+                'chromosome': 'CHR',
+                'base_pair_location': 'POS',
+                'rsid': 'SNP',
+                'effect_allele': 'A1',
+                'other_allele': 'A2',
+                'beta': 'BETA',
+                'standard_error': 'SE',
+                'effect_allele_frequency': 'MAF',
+                'p_value': 'PVAL',
+                'n': 'N'
+            }
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the standardized summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+
+    Initialize the standardized summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    self.col_name_converter.update(
+        {
+            'chromosome': 'CHR',
+            'base_pair_location': 'POS',
+            'rsid': 'SNP',
+            'effect_allele': 'A1',
+            'other_allele': 'A2',
+            'beta': 'BETA',
+            'standard_error': 'SE',
+            'effect_allele_frequency': 'MAF',
+            'p_value': 'PVAL',
+            'n': 'N'
+        }
+    )
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ SaigeSSParser + + +

+ + +
+

+ Bases: SumstatsParser

+ + +

A specialized class for parsing GWAS summary statistics files generated by the SAIGE software. +Reference and details: +https://saigegit.github.io/SAIGE-doc/docs/single_step2.html

+

TODO: Ensure that the column names are correct across different trait types +and the inference of the sample size is correct.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
class SaigeSSParser(SumstatsParser):
+    """
+    A specialized class for parsing GWAS summary statistics files generated by the `SAIGE` software.
+    Reference and details:
+    https://saigegit.github.io/SAIGE-doc/docs/single_step2.html
+
+    TODO: Ensure that the column names are correct across different trait types
+    and the inference of the sample size is correct.
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        Initialize the `SAIGE` summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+        super().__init__(col_name_converter, **read_csv_kwargs)
+
+        self.col_name_converter = self.col_name_converter or {}
+
+        # NOTE: SAIGE considers Allele2 to be the effect allele, so
+        # we switch their designation here:
+        self.col_name_converter.update(
+            {
+                'MarkerID': 'SNP',
+                'Allele1': 'A2',
+                'Allele2': 'A1',
+                'AF_Allele2': 'MAF',
+                'AC_Allele2': 'MAC',
+                'Tstat': 'Z',
+                'p.value': 'PVAL',
+            }
+        )
+
+    def parse(self, file_name, drop_na=True):
+        """
+        Parse the summary statistics file.
+        :param file_name: The path to the summary statistics file.
+        :param drop_na: Drop any entries with missing values.
+
+        :return: A pandas DataFrame containing the parsed summary statistics.
+        """
+
+        df = super().parse(file_name, drop_na=drop_na)
+
+        # Infer the sample size N
+        df['N'] = df['MAC'] / (2.*df['MAF'])
+
+        return df
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the SAIGE summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    Initialize the `SAIGE` summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+    super().__init__(col_name_converter, **read_csv_kwargs)
+
+    self.col_name_converter = self.col_name_converter or {}
+
+    # NOTE: SAIGE considers Allele2 to be the effect allele, so
+    # we switch their designation here:
+    self.col_name_converter.update(
+        {
+            'MarkerID': 'SNP',
+            'Allele1': 'A2',
+            'Allele2': 'A1',
+            'AF_Allele2': 'MAF',
+            'AC_Allele2': 'MAC',
+            'Tstat': 'Z',
+            'p.value': 'PVAL',
+        }
+    )
+
+
+
+ +
+ + +
+ + + +

+ parse(file_name, drop_na=True) + +

+ + +
+ +

Parse the summary statistics file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_name + +
+

The path to the summary statistics file.

+
+
+ required +
drop_na + +
+

Drop any entries with missing values.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame containing the parsed summary statistics.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
def parse(self, file_name, drop_na=True):
+    """
+    Parse the summary statistics file.
+    :param file_name: The path to the summary statistics file.
+    :param drop_na: Drop any entries with missing values.
+
+    :return: A pandas DataFrame containing the parsed summary statistics.
+    """
+
+    df = super().parse(file_name, drop_na=drop_na)
+
+    # Infer the sample size N
+    df['N'] = df['MAC'] / (2.*df['MAF'])
+
+    return df
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ SumstatsParser + + +

+ + +
+

+ Bases: object

+ + +

A wrapper class for parsing summary statistics files that are written by statistical genetics software +for Genome-wide Association testing. A common challenge is the fact that different software tools +output summary statistics in different formats and with different column names. Thus, this class +provides a common interface for parsing summary statistics files from different software tools +and aims to make this process as seamless as possible.

+

The class is designed to be extensible, so that users can easily add new parsers for different software tools.

+ + + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
col_name_converter + +
+

A dictionary mapping column names in the original table to magenpy's column names.

+
+
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
class SumstatsParser(object):
+    """
+    A wrapper class for parsing summary statistics files that are written by statistical genetics software
+    for Genome-wide Association testing. A common challenge is the fact that different software tools
+    output summary statistics in different formats and with different column names. Thus, this class
+    provides a common interface for parsing summary statistics files from different software tools
+    and aims to make this process as seamless as possible.
+
+    The class is designed to be extensible, so that users can easily add new parsers for different software tools.
+
+    !!! seealso "See Also"
+        * [Plink2SSParser][magenpy.parsers.sumstats_parsers.Plink2SSParser]
+        * [Plink1SSParser][magenpy.parsers.sumstats_parsers.Plink1SSParser]
+        * [COJOSSParser][magenpy.parsers.sumstats_parsers.COJOSSParser]
+        * [FastGWASSParser][magenpy.parsers.sumstats_parsers.FastGWASSParser]
+        * [SSFParser][magenpy.parsers.sumstats_parsers.SSFParser]
+        * [SaigeSSParser][magenpy.parsers.sumstats_parsers.SaigeSSParser]
+
+    :ivar col_name_converter: A dictionary mapping column names in the original table to magenpy's column names.
+    :ivar read_csv_kwargs: Keyword arguments to pass to pandas' `read_csv`.
+
+    """
+
+    def __init__(self, col_name_converter=None, **read_csv_kwargs):
+        """
+        Initialize the summary statistics parser.
+
+        :param col_name_converter: A dictionary/string mapping column names
+        in the original table to magenpy's column names for the various
+        summary statistics. If a string, it should be a comma-separated list of
+        key-value pairs (e.g. 'rsid=SNP,pos=POS').
+        :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+        """
+
+        if isinstance(col_name_converter, str):
+            self.col_name_converter = {
+                k: v for entry in col_name_converter.split(',') for k, v in [entry.strip().split('=')]
+                if len(entry.strip()) > 0
+            }
+        else:
+            self.col_name_converter = col_name_converter
+
+        self.read_csv_kwargs = read_csv_kwargs
+
+        # If the delimiter is not specified, assume whitespace by default:
+        if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
+            self.read_csv_kwargs['sep'] = r'\s+'
+
+    def parse(self, file_name, drop_na=True):
+        """
+        Parse a summary statistics file.
+        :param file_name: The path to the summary statistics file.
+        :param drop_na: If True, drop any entries with missing values.
+
+        :return: A pandas DataFrame containing the parsed summary statistics.
+        """
+
+        df = pd.read_csv(file_name, **self.read_csv_kwargs)
+
+        if drop_na:
+            df = df.dropna()
+
+        if self.col_name_converter is not None:
+            df.rename(columns=self.col_name_converter, inplace=True)
+
+        try:
+            df['POS'] = df['POS'].astype(np.int32)
+        except KeyError:
+            pass
+
+        return df
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(col_name_converter=None, **read_csv_kwargs) + +

+ + +
+ +

Initialize the summary statistics parser.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
col_name_converter + +
+

A dictionary/string mapping column names in the original table to magenpy's column names for the various summary statistics. If a string, it should be a comma-separated list of key-value pairs (e.g. 'rsid=SNP,pos=POS').

+
+
+ None +
read_csv_kwargs + +
+

Keyword arguments to pass to pandas' read_csv

+
+
+ {} +
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
def __init__(self, col_name_converter=None, **read_csv_kwargs):
+    """
+    Initialize the summary statistics parser.
+
+    :param col_name_converter: A dictionary/string mapping column names
+    in the original table to magenpy's column names for the various
+    summary statistics. If a string, it should be a comma-separated list of
+    key-value pairs (e.g. 'rsid=SNP,pos=POS').
+    :param read_csv_kwargs: Keyword arguments to pass to pandas' read_csv
+    """
+
+    if isinstance(col_name_converter, str):
+        self.col_name_converter = {
+            k: v for entry in col_name_converter.split(',') for k, v in [entry.strip().split('=')]
+            if len(entry.strip()) > 0
+        }
+    else:
+        self.col_name_converter = col_name_converter
+
+    self.read_csv_kwargs = read_csv_kwargs
+
+    # If the delimiter is not specified, assume whitespace by default:
+    if 'sep' not in self.read_csv_kwargs and 'delimiter' not in self.read_csv_kwargs:
+        self.read_csv_kwargs['sep'] = r'\s+'
+
+
+
+ +
+ + +
+ + + +

+ parse(file_name, drop_na=True) + +

+ + +
+ +

Parse a summary statistics file.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
file_name + +
+

The path to the summary statistics file.

+
+
+ required +
drop_na + +
+

If True, drop any entries with missing values.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame containing the parsed summary statistics.

+
+
+ +
+ Source code in magenpy/parsers/sumstats_parsers.py +
54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
def parse(self, file_name, drop_na=True):
+    """
+    Parse a summary statistics file.
+    :param file_name: The path to the summary statistics file.
+    :param drop_na: If True, drop any entries with missing values.
+
+    :return: A pandas DataFrame containing the parsed summary statistics.
+    """
+
+    df = pd.read_csv(file_name, **self.read_csv_kwargs)
+
+    if drop_na:
+        df = df.dropna()
+
+    if self.col_name_converter is not None:
+        df.rename(columns=self.col_name_converter, inplace=True)
+
+    try:
+        df['POS'] = df['POS'].astype(np.int32)
+    except KeyError:
+        pass
+
+    return df
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/plot/gwa/index.html b/api/plot/gwa/index.html new file mode 100644 index 0000000..22d1647 --- /dev/null +++ b/api/plot/gwa/index.html @@ -0,0 +1,1211 @@ + + + + + + + + + + + + + + + + + + + Gwa - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Gwa

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ manhattan(input_data, y=None, y_label=None, chrom_sep_color='#f0f0f0', snp_color='#808080', snp_marker='o', snp_alpha=0.3, add_bonf_line=True, bonf_line_color='#b06a7a') + +

+ + +
+ +

Generate Manhattan plot where the x-axis is the genomic position (in BP) +and the y-axis is the -log10(p-value) or some other statistic of the user's choice.

+

TODO: Add functionality to highlight certain SNPs or markers on the plot.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
input_data + Union[GWADataLoader, SumstatsTable] + +
+

An instance of SumstatsTable or GWADataLoader from which data about the positions of the SNPs will be extracted.

+
+
+ required +
y + +
+

An optional vector of values to plot on the y-axis. If not provided, the -log10(p-value) will be plotted by default.

+
+
+ None +
y_label + +
+

A label for the quantity or statistic that will be plotted on the y-axis.

+
+
+ None +
chrom_sep_color + +
+

The color for the chromosome separator block.

+
+
+ '#f0f0f0' +
snp_color + +
+

The color of the dots on the Manhattan plot.

+
+
+ '#808080' +
snp_marker + +
+

The shape of the marker on the Manhattan plot.

+
+
+ 'o' +
snp_alpha + +
+

The opacity level for the markers.

+
+
+ 0.3 +
add_bonf_line + +
+

If True, add a line indicating the Bonferroni significance threshold.

+
+
+ True +
bonf_line_color + +
+

The color of the Bonferroni significance threshold line.

+
+
+ '#b06a7a' +
+ +
+ Source code in magenpy/plot/gwa.py +
 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
def manhattan(input_data: Union[GWADataLoader, SumstatsTable],
+              y=None,
+              y_label=None,
+              chrom_sep_color='#f0f0f0',
+              snp_color='#808080',
+              snp_marker='o',
+              snp_alpha=0.3,
+              add_bonf_line=True,
+              bonf_line_color='#b06a7a'):
+
+    """
+    Generate Manhattan plot where the x-axis is the genomic position (in BP)
+    and the y-axis is the -log10(p-value) or some other statistic of the user's choice.
+
+    TODO: Add functionality to highlight certain SNPs or markers on the plot.
+
+    :param input_data: An instance of `SumstatsTable` or `GWADataLoader` from which data about the
+    positions of the SNPs will be extracted.
+    :param y: An optional vector of values to plot on the y-axis. If not provided, the -log10(p-value)
+    will be plotted by default.
+    :param y_label: A label for the quantity or statistic that will be plotted on the y-axis.
+    :param chrom_sep_color: The color for the chromosome separator block.
+    :param snp_color: The color of the dots on the Manhattan plot.
+    :param snp_marker: The shape of the marker on the Manhattan plot.
+    :param snp_alpha: The opacity level for the markers.
+    :param add_bonf_line: If True, add a line indicating the Bonferroni significance threshold.
+    :param bonf_line_color: The color of the Bonferroni significance threshold line.
+
+    """
+
+    if isinstance(input_data, SumstatsTable):
+        pos = {c: ss.bp_pos for c, ss in input_data.split_by_chromosome().items()}
+    elif isinstance(input_data, GWADataLoader):
+        pos = {c: ss.bp_pos for c, ss in input_data.sumstats_table.items()}
+    else:
+        raise ValueError("The input data must be an instance of `SumstatsTable` or `GWADataLoader`.")
+
+    starting_pos = 0
+    ticks = []
+    chrom_spacing = .1*min([p.max() - p.min() for c, p in pos.items()])
+
+    if y is None:
+        # If the values for the Y-axis are not provided,
+        # we assume that the user wishes to plot a standard Manhattan plot
+        # with -log10(p_value) on the Y-axis.
+
+        if add_bonf_line:
+            # Add bonferroni significance threshold line:
+            plt.axhline(-np.log10(0.05 / 1e6), ls='--', zorder=1, color=bonf_line_color)
+
+        if isinstance(input_data, SumstatsTable):
+            y = {c: ss.log10_p_value for c, ss in input_data.split_by_chromosome().items()}
+        else:
+            y = {c: ss.log10_p_value for c, ss in input_data.sumstats_table.items()}
+
+        y_label = "$-log_{10}$(p-value)"
+
+    unique_chr = sorted(list(pos.keys()))
+
+    for i, c in enumerate(unique_chr):
+
+        min_pos = pos[c].min()
+        max_pos = pos[c].max()
+
+        xmin = min_pos + starting_pos
+        xmax = max_pos + starting_pos
+        if i % 2 == 1:
+            plt.axvspan(xmin=xmin, xmax=xmax, zorder=0, color=chrom_sep_color)
+
+        ticks.append((xmin + xmax) / 2)
+
+        plt.scatter(pos[c] + starting_pos, y[c],
+                    c=snp_color, alpha=snp_alpha, label=None,
+                    marker=snp_marker)
+
+        #if hl_snps is not None:
+        #    plt.scatter((pos + starting_pos)[hl_snps[c]], y[c][hl_snps[c]],
+        #                c=hl_snp_color, alpha=snp_alpha, label=hl_snp_label,
+        #                marker=hl_snp_marker)
+
+        starting_pos += max_pos + chrom_spacing
+
+    plt.xticks(ticks, unique_chr)
+
+    plt.xlabel("Genomic Position")
+    plt.ylabel(y_label)
+
+    plt.tight_layout()
+
+
+
+ +
+ + +
+ + + +

+ qq_plot(input_data, statistic='p_value') + +

+ + +
+ +

Generate a quantile-quantile (QQ) plot for the GWAS summary statistics. +The function supports plotting QQ plots for the -log10(p-values) as well as +the z-score (if available).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
input_data + Union[GWADataLoader, SumstatsTable] + +
+

An instance of SumstatsTable or GWADataLoader from which data about the positions of the SNPs will be extracted.

+
+
+ required +
statistic + +
+

The statistic to generate the QQ plot for. We currently support p_value and z_score.

+
+
+ 'p_value' +
+ +
+ Source code in magenpy/plot/gwa.py +
def qq_plot(input_data: Union[GWADataLoader, SumstatsTable],
+            statistic='p_value'):
+    """
+    Generate a quantile-quantile (QQ) plot for the GWAS summary statistics.
+    The function supports plotting QQ plots for the -log10(p-values) as well as
+    the z-score (if available).
+
+    :param input_data: An instance of `SumstatsTable` or `GWADataLoader` from which data about the
+    positions of the SNPs will be extracted.
+    :param statistic: The statistic to generate the QQ plot for. We currently support `p_value` and `z_score`.
+    """
+
+    import scipy.stats as stats
+
+    if statistic == 'p_value':
+
+        if isinstance(input_data, SumstatsTable):
+            p_val = input_data.log10_p_value
+            m = input_data.m
+        elif isinstance(input_data, GWADataLoader):
+            p_val = np.concatenate([ss.log10_p_value for ss in input_data.sumstats_table.values()])
+            m = input_data.m
+        else:
+            raise ValueError("The input data must be an instance of `SumstatsTable` or `GWADataLoader`.")
+
+        plt.scatter(-np.log10(np.arange(1, m + 1) / m), np.sort(p_val)[::-1])
+
+        line = np.linspace(0., p_val.max() + 0.1*p_val.max())
+        plt.plot(line, line, c='red')
+        plt.xlabel("Expected $-log_{10}$(p-value)")
+        plt.ylabel("Observed $-log_{10}$(p-value)")
+
+    elif statistic == 'z_score':
+        if isinstance(input_data, SumstatsTable):
+            z_scs = input_data.z_score
+        elif isinstance(input_data, GWADataLoader):
+            z_scs = np.concatenate([ss.z_score for ss in input_data.sumstats_table.values()])
+        else:
+            raise ValueError("The input data must be an instance of `SumstatsTable` or `GWADataLoader`.")
+
+        stats.probplot(z_scs, dist="norm", plot=plt)
+        plt.show()
+    else:
+        raise NotImplementedError(f"No QQ plot can be generated for the statistic: {statistic}")
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/plot/ld/index.html b/api/plot/ld/index.html new file mode 100644 index 0000000..d0742bc --- /dev/null +++ b/api/plot/ld/index.html @@ -0,0 +1,889 @@ + + + + + + + + + + + + + + + + + + + Ld - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Ld

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ plot_ld_matrix(ldm, row_subset=None, display='full', cmap='OrRd', include_colorbar=True) + +

+ + +
+ +

Plot a heatmap representing the LD matrix or portions of it.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ldm + LDMatrix + +
+

An instance of LDMatrix.

+
+
+ required +
row_subset + +
+

A boolean or integer index array for the subset of rows/columns to extract from the LD matrix.

+
+
+ None +
display + +
+

A string indicating what part of the matrix to display. Can be 'full', 'upper', 'lower'. If upper, only the upper triangle of the matrix will be displayed. If lower, only the lower triangle will be displayed.

+
+
+ 'full' +
cmap + +
+

The color map for the LD matrix plot.

+
+
+ 'OrRd' +
include_colorbar + +
+

If True, include a colorbar in the plot.

+
+
+ True +
+ +
+ Source code in magenpy/plot/ld.py +
 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def plot_ld_matrix(ldm: LDMatrix,
+                   row_subset=None,
+                   display='full',
+                   cmap='OrRd',
+                   include_colorbar=True):
+    """
+    Plot a heatmap representing the LD matrix or portions of it.
+
+    :param ldm: An instance of `LDMatrix`.
+    :param row_subset: A boolean or integer index array for the subset of rows/columns to extract from the LD matrix.
+    :param display: A string indicating what part of the matrix to display. Can be 'full', 'upper', 'lower'.
+    If upper, only the upper triangle of the matrix will be displayed.
+    If lower, only the lower triangle will be displayed.
+    :param cmap: The color map for the LD matrix plot.
+    :param include_colorbar: If True, include a colorbar in the plot.
+    """
+
+    if row_subset is None:
+        row_subset = np.arange(ldm.shape[0])
+
+    # TODO: Figure out a way to do this without loading the entire matrix:
+    ldm.load(return_symmetric=True, fill_diag=True, dtype='float32')
+
+    mat = ldm.csr_matrix[row_subset, :][:, row_subset].toarray()
+
+    if display == 'upper':
+        mat = np.triu(mat, k=1)
+    elif display == 'lower':
+        mat = np.tril(mat, k=1)
+
+    plt.imshow(mat, cmap=cmap, vmin=-1., vmax=1.)
+
+    if include_colorbar:
+        plt.colorbar()
+
+    plt.axis('off')
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/simulation/AnnotatedPhenotypeSimulator/index.html b/api/simulation/AnnotatedPhenotypeSimulator/index.html new file mode 100644 index 0000000..6063829 --- /dev/null +++ b/api/simulation/AnnotatedPhenotypeSimulator/index.html @@ -0,0 +1,1621 @@ + + + + + + + + + + + + + + + + + + + AnnotatedPhenotypeSimulator - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

AnnotatedPhenotypeSimulator

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ AnnotatedPhenotypeSimulator + + +

+ + +
+

+ Bases: PhenotypeSimulator

+ + +

Simulate complex traits by incorporating genomic functional +annotations into the mixture densities that govern the effect size +of each variant on the trait.

+
+

Warning

+

This code is experimental and needs much further validation.

+
+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
class AnnotatedPhenotypeSimulator(PhenotypeSimulator):
+    """
+    Simulate complex traits by incorporating genomic functional
+    annotations into the mixture densities that govern the effect size
+    of each variant on the trait.
+
+    !!! warning
+        This code is experimental and needs much further validation.
+
+    """
+
+    def __init__(self, bed_files, **kwargs):
+        """
+        Create an instance of the AnnotatedPhenotypeSimulator class.
+
+        :param bed_files: A list of BED files that contain the genotype data.
+        :param kwargs: Additional keyword arguments for the PhenotypeSimulator class.
+        """
+
+        super().__init__(bed_files, **kwargs)
+
+        # For now, we will restrict to 2 mixture components.
+        assert self.n_components == 2
+
+        self.w_h2 = None  # The annotation weights for the per-SNP heritability
+        self.w_pi = None  # The annotation weights for the per-SNP causal probability
+
+    def set_w_h2(self, w_h2):
+        """
+        Set the annotation weights for the per SNP heritability
+        :param w_h2: A vector of weights for each annotation.
+        """
+
+        assert len(w_h2) == self.n_annotations
+
+        self.w_h2 = w_h2
+        self.set_per_snp_heritability()
+
+    def simulate_w_h2(self, enrichment=None):
+        """
+        Simulate the annotation weights for the per-SNP heritability
+        """
+        raise NotImplementedError
+
+    def set_w_pi(self, w_pi):
+        """
+        Set the annotation weights for the per SNP causal probability
+        :param w_pi: A vector of weights for each annotation.
+        """
+
+        assert len(w_pi) == self.n_annotations
+
+        self.w_pi = w_pi
+        self.set_per_snp_mixture_probability()
+
+    def simulate_w_pi(self, enrichment=None):
+        """
+        Simulate the annotation weights for the per-SNP causal probability
+
+        :param enrichment: A dictionary of enrichment values where the
+        key is the annotation and the value is the enrichment
+        """
+
+        enrichment = enrichment or {}
+        enr = []
+        for annot in self.annotation[self.chromosomes[0]].annotations:
+            try:
+                enr.append(enrichment[annot])
+            except KeyError:
+                enr.append(1.)
+
+        self.w_pi = np.log(np.array(enr))
+
+    def set_per_snp_heritability(self):
+        """
+        Set the per-SNP heritability values using the annotation weights.
+        """
+
+        if self.w_h2 is None:
+            return super().set_per_snp_heritability()
+
+        self.per_snp_h2 = {}
+
+        for c in self.chromosomes:
+            self.per_snp_h2[c] = np.clip(np.dot(self.annotation[c].values(), self.w_h2),
+                                         a_min=0., a_max=np.inf)
+
+    def set_per_snp_mixture_probability(self):
+        """
+        Set the per-SNP mixture probabilities using the annotation weights.
+        """
+
+        if self.w_pi is None:
+            return super().set_per_snp_mixture_probability()
+
+        self.per_snp_pi = {}
+
+        for c in self.chromosomes:
+            prob = 1./(1. + np.exp(-np.dot(self.annotation[c].values(add_intercept=True),
+                                           np.concatenate([[np.log(self.pi[1])], self.w_pi]))))
+            self.per_snp_pi[c] = np.array([1. - prob, prob]).T
+
+    def get_heritability_enrichment(self):
+        """
+        Estimate the enrichment of heritability per annotation.
+        """
+
+        tabs = self.to_true_beta_table(per_chromosome=True)
+        total_heritability = sum([tab['Heritability'].sum() for c, tab in tabs.items()])
+
+        heritability_per_binary_annot = {
+            bin_annot: 0. for bin_annot in self.annotation[self.chromosomes[0]].binary_annotations
+        }
+
+        n_variants_per_binary_annot = {
+            bin_annot: 0 for bin_annot in heritability_per_binary_annot
+        }
+
+        for c, c_size in self.shapes.items():
+            for bin_annot in self.annotation[c].binary_annotations:
+                annot_idx = self.annotation[c].get_binary_annotation_index(bin_annot)
+                heritability_per_binary_annot[bin_annot] += tabs[c].iloc[np.array(annot_idx), :]['Heritability'].sum()
+                n_variants_per_binary_annot[bin_annot] += len(annot_idx)
+
+        return {
+            bin_annot: (ba_h2/total_heritability) / (n_variants_per_binary_annot[bin_annot] / self.m)
+            for bin_annot, ba_h2 in heritability_per_binary_annot.items()
+        }
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(bed_files, **kwargs) + +

+ + +
+ +

Create an instance of the AnnotatedPhenotypeSimulator class.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_files + +
+

A list of BED files that contain the genotype data.

+
+
+ required +
kwargs + +
+

Additional keyword arguments for the PhenotypeSimulator class.

+
+
+ {} +
+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
def __init__(self, bed_files, **kwargs):
+    """
+    Create an instance of the AnnotatedPhenotypeSimulator class.
+
+    :param bed_files: A list of BED files that contain the genotype data.
+    :param kwargs: Additional keyword arguments for the PhenotypeSimulator class.
+    """
+
+    super().__init__(bed_files, **kwargs)
+
+    # For now, we will restrict to 2 mixture components.
+    assert self.n_components == 2
+
+    self.w_h2 = None  # The annotation weights for the per-SNP heritability
+    self.w_pi = None  # The annotation weights for the per-SNP causal probability
+
+
+
+ +
+ + +
+ + + +

+ get_heritability_enrichment() + +

+ + +
+ +

Estimate the enrichment of heritability per annotation.

+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
def get_heritability_enrichment(self):
+    """
+    Estimate the enrichment of heritability per annotation.
+    """
+
+    tabs = self.to_true_beta_table(per_chromosome=True)
+    total_heritability = sum([tab['Heritability'].sum() for c, tab in tabs.items()])
+
+    heritability_per_binary_annot = {
+        bin_annot: 0. for bin_annot in self.annotation[self.chromosomes[0]].binary_annotations
+    }
+
+    n_variants_per_binary_annot = {
+        bin_annot: 0 for bin_annot in heritability_per_binary_annot
+    }
+
+    for c, c_size in self.shapes.items():
+        for bin_annot in self.annotation[c].binary_annotations:
+            annot_idx = self.annotation[c].get_binary_annotation_index(bin_annot)
+            heritability_per_binary_annot[bin_annot] += tabs[c].iloc[np.array(annot_idx), :]['Heritability'].sum()
+            n_variants_per_binary_annot[bin_annot] += len(annot_idx)
+
+    return {
+        bin_annot: (ba_h2/total_heritability) / (n_variants_per_binary_annot[bin_annot] / self.m)
+        for bin_annot, ba_h2 in heritability_per_binary_annot.items()
+    }
+
+
+
+ +
+ + +
+ + + +

+ set_per_snp_heritability() + +

+ + +
+ +

Set the per-SNP heritability values using the annotation weights.

+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
def set_per_snp_heritability(self):
+    """
+    Set the per-SNP heritability values using the annotation weights.
+    """
+
+    if self.w_h2 is None:
+        return super().set_per_snp_heritability()
+
+    self.per_snp_h2 = {}
+
+    for c in self.chromosomes:
+        self.per_snp_h2[c] = np.clip(np.dot(self.annotation[c].values(), self.w_h2),
+                                     a_min=0., a_max=np.inf)
+
+
+
+ +
+ + +
+ + + +

+ set_per_snp_mixture_probability() + +

+ + +
+ +

Set the per-SNP mixture probabilities using the annotation weights.

+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
def set_per_snp_mixture_probability(self):
+    """
+    Set the per-SNP mixture probabilities using the annotation weights.
+    """
+
+    if self.w_pi is None:
+        return super().set_per_snp_mixture_probability()
+
+    self.per_snp_pi = {}
+
+    for c in self.chromosomes:
+        prob = 1./(1. + np.exp(-np.dot(self.annotation[c].values(add_intercept=True),
+                                       np.concatenate([[np.log(self.pi[1])], self.w_pi]))))
+        self.per_snp_pi[c] = np.array([1. - prob, prob]).T
+
+
+
+ +
+ + +
+ + + +

+ set_w_h2(w_h2) + +

+ + +
+ +

Set the annotation weights for the per SNP heritability

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
w_h2 + +
+

A vector of weights for each annotation.

+
+
+ required +
+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
33
+34
+35
+36
+37
+38
+39
+40
+41
+42
def set_w_h2(self, w_h2):
+    """
+    Set the annotation weights for the per SNP heritability
+    :param w_h2: A vector of weights for each annotation.
+    """
+
+    assert len(w_h2) == self.n_annotations
+
+    self.w_h2 = w_h2
+    self.set_per_snp_heritability()
+
+
+
+ +
+ + +
+ + + +

+ set_w_pi(w_pi) + +

+ + +
+ +

Set the annotation weights for the per SNP causal probability

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
w_pi + +
+

A vector of weights for each annotation.

+
+
+ required +
+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
50
+51
+52
+53
+54
+55
+56
+57
+58
+59
def set_w_pi(self, w_pi):
+    """
+    Set the annotation weights for the per SNP causal probability
+    :param w_pi: A vector of weights for each annotation.
+    """
+
+    assert len(w_pi) == self.n_annotations
+
+    self.w_pi = w_pi
+    self.set_per_snp_mixture_probability()
+
+
+
+ +
+ + +
+ + + +

+ simulate_w_h2(enrichment=None) + +

+ + +
+ +

Simulate the annotation weights for the per-SNP heritability

+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
44
+45
+46
+47
+48
def simulate_w_h2(self, enrichment=None):
+    """
+    Simulate the annotation weights for the per-SNP heritability
+    """
+    raise NotImplementedError
+
+
+
+ +
+ + +
+ + + +

+ simulate_w_pi(enrichment=None) + +

+ + +
+ +

Simulate the annotation weights for the per-SNP causal probability

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
enrichment + +
+

A dictionary of enrichment values where the key is the annotation and the value is the enrichment

+
+
+ None +
+ +
+ Source code in magenpy/simulation/AnnotatedPhenotypeSimulator.py +
61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
def simulate_w_pi(self, enrichment=None):
+    """
+    Simulate the annotation weights for the per-SNP causal probability
+
+    :param enrichment: A dictionary of enrichment values where the
+    key is the annotation and the value is the enrichment
+    """
+
+    enrichment = enrichment or {}
+    enr = []
+    for annot in self.annotation[self.chromosomes[0]].annotations:
+        try:
+            enr.append(enrichment[annot])
+        except KeyError:
+            enr.append(1.)
+
+    self.w_pi = np.log(np.array(enr))
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/simulation/MultiCohortPhenotypeSimulator/index.html b/api/simulation/MultiCohortPhenotypeSimulator/index.html new file mode 100644 index 0000000..14d3f5d --- /dev/null +++ b/api/simulation/MultiCohortPhenotypeSimulator/index.html @@ -0,0 +1,1254 @@ + + + + + + + + + + + + + + + + + + + MultiCohortPhenotypeSimulator - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

MultiCohortPhenotypeSimulator

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ MultiCohortPhenotypeSimulator + + +

+ + +
+

+ Bases: GWADataLoader

+ + +

A module for simulating GWAS data for separate cohorts or clusters of the data. +This includes scenarios such as multi-population or multi-ethnic datasets, or +datasets that can be stratified by a discrete variable.

+
+

Warning

+

This code is experimental and needs much further validation.

+
+ +
+ Source code in magenpy/simulation/MultiCohortPhenotypeSimulator.py +
class MultiCohortPhenotypeSimulator(GWADataLoader):
+    """
+    A module for simulating GWAS data for separate cohorts or clusters of the data.
+    This includes scenarios such as multi-population or multi-ethnic datasets, or 
+    datasets that can be stratified by a discrete variable.
+
+    !!! warning
+        This code is experimental and needs much further validation.
+
+    """
+
+    def __init__(self,
+                 bed_files,
+                 cluster_assignments_file,
+                 prop_shared_causal=1.,
+                 rho=1.,
+                 **kwargs):
+        """
+        Simulate phenotypes using the linear additive model while accounting 
+        for heterogeneous genetic architectures across cohorts.
+
+        :param bed_files: A path (or list of paths) to PLINK BED files.
+        :param cluster_assignments_file: A file mapping each sample in the BED files to their corresponding 
+        cohort or cluster.
+        :param prop_shared_causal: Proportion of causal variants that are shared across clusters.
+        :param rho: The correlation coefficient for the effect size across clusters.
+        """
+
+        super().__init__(bed_files, **kwargs)
+
+        from ..parsers.misc_parsers import parse_cluster_assignment_file
+
+        self.cluster_table = parse_cluster_assignment_file(cluster_assignments_file)
+
+        # Proportion of causal snps that are shared
+        self.prop_shared_causal = prop_shared_causal
+
+        # Rho can be either a scalar or a matrix that determines the patterns of
+        # correlations between effect sizes in different clusters.
+        if np.issubdtype(type(rho), np.floating):
+            self.rho = rho*np.ones(shape=(len(self.clusters), len(self.clusters)))
+            np.fill_diagonal(self.rho, 1.)
+        else:
+            self.rho = rho
+
+        # Reference cluster
+        self.ref_cluster = None
+
+        # A dictionary of GWAS simulators for each cluster
+        self.cluster_simulators = {}
+
+        for c in self.clusters:
+            if self.ref_cluster is None:
+                self.ref_cluster = c
+
+            self.cluster_simulators[c] = PhenotypeSimulator(bed_files,
+                                                            keep_samples=self.get_samples_in_cluster(c),
+                                                            **kwargs)
+
+    @property
+    def clusters(self):
+        return self.cluster_table['Cluster'].unique()
+
+    def get_samples_in_cluster(self, cluster):
+        return self.cluster_table.loc[self.cluster_table['Cluster'] == cluster, 'IID'].values
+
+    def set_reference_cluster(self, c):
+        self.ref_cluster = c
+
+    def simulate_causal_status(self):
+
+        # The reference simulator:
+        ref_sim = self.cluster_simulators[self.ref_cluster]
+
+        # Simulate causal snps in reference cluster:
+        ref_sim.simulate_mixture_assignment()
+
+        # Get the causal snps in reference cluster:
+        ref_causal = {
+            c: np.where(a)[0]
+            for c, a in ref_sim.get_causal_status().items()
+        }
+
+        for c in self.clusters:
+            # For each cluster that is not the reference,
+            # update their causal snps according to our draw for
+            # the reference cluster
+            if c != self.ref_cluster:
+
+                new_mixture = ref_sim.mixture_assignment.copy()
+
+                if self.prop_shared_causal < 1.:
+                    for ch, ref_c in ref_causal.items():
+
+                        # The number of shared causal snps for Chromosome `ch`:
+                        n_shared_causal = int(np.floor(self.prop_shared_causal * len(ref_c)))
+
+                        # Number of snps to flip:
+                        n_flip = len(ref_c) - n_shared_causal
+
+                        # Randomly decide which snps to "turn off":
+                        for i in np.random.choice(ref_c, size=n_flip, replace=False):
+                            new_mixture[ch][i] = new_mixture[ch][i][::-1]
+                            # With probability p, switch on some other randomly chosen SNP:
+                            # NOTE: If the number of SNPs is small, there's a small chance
+                            # that this may flip the same SNP multiple times.
+                            if np.random.uniform() < ref_sim.pis[1]:
+                                new_i = np.random.choice(self.shapes[ch])
+                                new_mixture[ch][new_i] = new_mixture[ch][new_i][::-1]
+
+                self.cluster_simulators[c].set_mixture_assignment(
+                    new_mixture
+                )
+
+    def simulate_beta(self):
+
+        for c in self.clusters:
+            self.cluster_simulators[c].beta = {}
+
+        for ch, c_size in self.shapes.items():
+            # Draw the beta from a multivariate normal distribution with covariance
+            # as specified in the matrix `rho`.
+            betas = np.random.multivariate_normal(np.zeros(self.rho.shape[0]), cov=self.rho, size=c_size)
+            for i, c in enumerate(self.clusters):
+                self.cluster_simulators[c].beta[ch] = (
+                        self.cluster_simulators[c].get_causal_status()[ch].astype(np.int32)*betas[:, i]
+                )
+
+    def simulate(self, perform_gwas=False):
+
+        self.simulate_causal_status()
+        self.simulate_beta()
+
+        iids = self.sample_table.iid
+
+        phenotypes = pd.Series(np.zeros_like(iids), index=iids)
+
+        for c in self.clusters:
+            self.cluster_simulators[c].simulate(reset_beta=False)
+            phenotypes[self.cluster_simulators[c].sample_table.iid] = self.cluster_simulators[c].sample_table.phenotype
+
+        self.set_phenotype(phenotypes)
+
+        # Perform GWAS on the pooled sample:
+        if perform_gwas:
+            self.perform_gwas()
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(bed_files, cluster_assignments_file, prop_shared_causal=1.0, rho=1.0, **kwargs) + +

+ + +
+ +

Simulate phenotypes using the linear additive model while accounting +for heterogeneous genetic architectures across cohorts.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_files + +
+

A path (or list of paths) to PLINK BED files.

+
+
+ required +
cluster_assignments_file + +
+

A file mapping each sample in the BED files to their corresponding cohort or cluster.

+
+
+ required +
prop_shared_causal + +
+

Proportion of causal variants that are shared across clusters.

+
+
+ 1.0 +
rho + +
+

The correlation coefficient for the effect size across clusters.

+
+
+ 1.0 +
+ +
+ Source code in magenpy/simulation/MultiCohortPhenotypeSimulator.py +
19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
def __init__(self,
+             bed_files,
+             cluster_assignments_file,
+             prop_shared_causal=1.,
+             rho=1.,
+             **kwargs):
+    """
+    Simulate phenotypes using the linear additive model while accounting 
+    for heterogeneous genetic architectures across cohorts.
+
+    :param bed_files: A path (or list of paths) to PLINK BED files.
+    :param cluster_assignments_file: A file mapping each sample in the BED files to their corresponding 
+    cohort or cluster.
+    :param prop_shared_causal: Proportion of causal variants that are shared across clusters.
+    :param rho: The correlation coefficient for the effect size across clusters.
+    """
+
+    super().__init__(bed_files, **kwargs)
+
+    from ..parsers.misc_parsers import parse_cluster_assignment_file
+
+    self.cluster_table = parse_cluster_assignment_file(cluster_assignments_file)
+
+    # Proportion of causal snps that are shared
+    self.prop_shared_causal = prop_shared_causal
+
+    # Rho can be either a scalar or a matrix that determines the patterns of
+    # correlations between effect sizes in different clusters.
+    if np.issubdtype(type(rho), np.floating):
+        self.rho = rho*np.ones(shape=(len(self.clusters), len(self.clusters)))
+        np.fill_diagonal(self.rho, 1.)
+    else:
+        self.rho = rho
+
+    # Reference cluster
+    self.ref_cluster = None
+
+    # A dictionary of GWAS simulators for each cluster
+    self.cluster_simulators = {}
+
+    for c in self.clusters:
+        if self.ref_cluster is None:
+            self.ref_cluster = c
+
+        self.cluster_simulators[c] = PhenotypeSimulator(bed_files,
+                                                        keep_samples=self.get_samples_in_cluster(c),
+                                                        **kwargs)
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/simulation/PhenotypeSimulator/index.html b/api/simulation/PhenotypeSimulator/index.html new file mode 100644 index 0000000..c44125b --- /dev/null +++ b/api/simulation/PhenotypeSimulator/index.html @@ -0,0 +1,3184 @@ + + + + + + + + + + + + + + + + + + + PhenotypeSimulator - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

PhenotypeSimulator

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ PhenotypeSimulator + + +

+ + +
+

+ Bases: GWADataLoader

+ + +

A wrapper class that supports simulating complex traits with a variety of +genetic architectures and heritability values, using the standard linear model. The +basic implementation supports simulating effect sizes from a sparse Gaussian mixture density, +allowing some variants to have larger effects than others. The class also supports simulating +binary phenotypes (case-control) by thresholding the continuous phenotype at a specified threshold.

+

To be concrete, the generative model for the simulation is as follows:

+

1) Simulate the mixture assignment for each variant based on the mixing proportions pi. +2) Simulate the effect sizes for each variant from the corresponding Gaussian density that they were assigned. +3) Compute the polygenic score for each individual based on the simulated effect sizes. +4) Simulate the residual component of the phenotype, in such a way that the total heritability is preserved.

+
+

See Also

+ +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
pi + +
+

The mixing proportions for the Gaussian mixture density.

+
+
h2 + +
+

The trait SNP heritability, or proportion of variance explained by SNPs.

+
+
d + +
+

The variance multipliers for each component of the Gaussian mixture density.

+
+
prevalence + +
+

The (disease) prevalence for binary (case-control) phenotypes.

+
+
per_snp_h2 + +
+

The per-SNP heritability for each variant in the dataset.

+
+
per_snp_pi + +
+

The per-SNP mixing proportions for each variant in the dataset.

+
+
beta + +
+

The effect sizes for each variant in the dataset.

+
+
mixture_assignment + +
+

The assignment of each variant to a mixture component.

+
+
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
class PhenotypeSimulator(GWADataLoader):
+    """
+    A wrapper class that supports simulating complex traits with a variety of
+    genetic architectures and heritability values, using the standard linear model. The
+    basic implementation supports simulating effect sizes from a sparse Gaussian mixture density,
+    allowing some variants to have larger effects than others. The class also supports simulating
+    binary phenotypes (case-control) by thresholding the continuous phenotype at a specified threshold.
+
+    To be concrete, the generative model for the simulation is as follows:
+
+    1) Simulate the mixture assignment for each variant based on the mixing proportions `pi`.
+    2) Simulate the effect sizes for each variant from the corresponding Gaussian density that they were assigned.
+    3) Compute the polygenic score for each individual based on the simulated effect sizes.
+    4) Simulate the residual component of the phenotype, in such a way that the total heritability is preserved.
+
+    !!! seealso "See Also"
+        * [GWADataLoader][magenpy.GWADataLoader.GWADataLoader]
+
+    :ivar pi: The mixing proportions for the Gaussian mixture density.
+    :ivar h2: The trait SNP heritability, or proportion of variance explained by SNPs.
+    :ivar d: The variance multipliers for each component of the Gaussian mixture density.
+    :ivar prevalence: The (disease) prevalence for binary (case-control) phenotypes.
+    :ivar per_snp_h2: The per-SNP heritability for each variant in the dataset.
+    :ivar per_snp_pi: The per-SNP mixing proportions for each variant in the dataset.
+    :ivar beta: The effect sizes for each variant in the dataset.
+    :ivar mixture_assignment: The assignment of each variant to a mixture component.
+
+    """
+
+    def __init__(self,
+                 bed_files,
+                 h2=0.2,
+                 pi=0.1,
+                 d=(0., 1.),
+                 prevalence=0.15,
+                 **kwargs):
+        """
+        Initialize the PhenotypeSimulator object with the necessary parameters.
+
+        :param bed_files: A path (or list of paths) to PLINK BED files containing the genotype information.
+        :param h2: The trait SNP heritability, or proportion of variance explained by SNPs.
+        :param pi: The mixing proportions for the mixture of Gaussians (our model for the distribution of effect sizes).
+        If a float is provided, it is converted to a tuple (1-pi, pi), where pi is the proportion of causal variants.
+        :param d:  The variance multipliers for each component of the Gaussian mixture density. By default,
+        all components have the same variance multiplier.
+        :param prevalence: The (disease) prevalence for binary (case-control) phenotypes.
+        """
+
+        super().__init__(bed_files, **kwargs)
+
+        # If pi is float, convert it to a tuple:
+        if isinstance(pi, float):
+            pi = (1. - pi, pi)
+
+        self.pi = pi
+        self.h2 = h2
+        self.prevalence = prevalence
+
+        # Sanity checks:
+        assert 0. <= self.h2 <= 1.
+        assert round(sum(self.pi), 1) == 1.
+        assert 0. < self.prevalence < 1.
+
+        self.d = np.array(d)
+
+        self.per_snp_h2 = None
+        self.per_snp_pi = None
+        self.beta = None
+        self.mixture_assignment = None
+
+    @property
+    def n_components(self):
+        """
+        :return: The number of Gaussian mixture components for the effect size distribution.
+        """
+        return len(self.pi)
+
+    def set_pi(self, new_pi):
+        """
+        Set the mixture proportions (proportion of variants in each
+        Gaussian mixture component).
+        """
+        self.pi = new_pi
+        self.set_per_snp_mixture_probability()
+
+    def set_h2(self, new_h2):
+        """
+        Set the total heritability (proportion of additive variance due to SNPs) for the trait
+        """
+        self.h2 = new_h2
+        self.set_per_snp_heritability()
+
+    def set_per_snp_mixture_probability(self):
+        """
+        Set the per-SNP mixing proportions for each variant in the dataset.
+        This is a convenience method that may come in handy for more flexible generative models.
+        """
+
+        self.per_snp_pi = {}
+
+        for c, c_size in self.shapes.items():
+            self.per_snp_pi[c] = np.repeat(np.array([self.pi]), c_size, axis=0)
+
+    def set_per_snp_heritability(self):
+        """
+        Set the per-SNP heritability (effect size variance) for each variant in the dataset.
+        This is a convenience method that may come in handy for more flexible generative models.
+        """
+
+        assert self.mixture_assignment is not None
+
+        # Estimate the global sigma_beta_sq based on the
+        # pre-specified heritability, the mixture proportions `pi`,
+        # and the prior multipliers `d`.
+
+        combined_assignments = np.concatenate([self.mixture_assignment[c] for c in self.chromosomes])
+
+        sigma_beta_sq = self.h2 / (combined_assignments*self.d).sum()
+
+        self.per_snp_h2 = {}
+
+        for c, c_size in self.shapes.items():
+            self.per_snp_h2[c] = sigma_beta_sq*self.d[np.where(self.mixture_assignment[c])[1]]
+
+    def get_causal_status(self):
+        """
+        :return: A dictionary where the keys are the chromosome numbers
+        and the values are binary vectors indicating which SNPs are
+        causal for the simulated phenotype.
+
+        :raises AssertionError: If the mixture assignment is not set.
+        """
+
+        assert self.mixture_assignment is not None
+
+        try:
+            zero_index = list(self.d).index(0)
+        except ValueError:
+            # If all SNPs are causal:
+            return {c: np.repeat(True, c_size) for c, c_size in self.shapes.items()}
+
+        causal_status = {}
+
+        for c, mix_a in self.mixture_assignment.items():
+            causal_status[c] = np.where(mix_a)[1] != zero_index
+
+        return causal_status
+
+    def set_causal_snps(self, causal_snps):
+        """
+        A utility method to set the causal variants in the simulation based on an array or
+        list of SNPs specified by the user. The method takes an iterable (e.g. list or array) of `causal_snps`
+        and then creates a new mixture assignment object where only the `causal_snps`
+        contribute to the phenotype.
+
+        :param causal_snps: A list or array of SNP rsIDs.
+        :raises ValueError: If all mixture components are causal.
+
+        """
+
+        # Get the index of the mixture component whose multiplier is zero (i.e. the null component):
+        try:
+            zero_index = list(self.d).index(0)
+        except ValueError:
+            raise ValueError("Cannot set causal variants when all mixture components are causal. Modify "
+                             "the mixture multipliers `d` to enable this functionality.")
+
+        # Get the indices of the non-null mixture components:
+        nonzero_indices = [i for i, d in enumerate(self.d) if d != 0.]
+
+        # Get the mixture proportions for the non-null components and normalize them so they sum to 1:
+        pis = np.array(self.pi)[nonzero_indices]
+        pis /= pis.sum()
+
+        # Initialize new mixture assignment object:
+        new_assignment = {c: np.zeros((s, self.n_components)) for c, s in self.shapes.items()}
+
+        from ..utils.compute_utils import intersect_arrays
+
+        n_causal_set = 0
+
+        for c, snps in self.snps.items():
+
+            # Intersect the list of causal SNPs with the SNPs on chromosome `c`:
+            snp_idx = intersect_arrays(snps, causal_snps, return_index=True)
+
+            if len(snp_idx) > 0:
+                n_causal_set += len(snp_idx)
+                # For the causal SNPs, assign them randomly to the causal components
+                new_assignment[c][snp_idx, np.random.choice(nonzero_indices,
+                                                            size=len(snp_idx),
+                                                            p=pis)] = 1
+                # For the remaining SNPs, assign them to the null component:
+                new_assignment[c][:, zero_index] = np.abs(new_assignment[c].sum(axis=1) - 1)
+
+        if n_causal_set < len(causal_snps):
+            warnings.warn("Not all causal SNPs are represented in the genotype matrix! "
+                          f"User passed a list of {len(causal_snps)} SNPs, only matched {n_causal_set}.")
+
+        self.set_mixture_assignment(new_assignment)
+
+    def set_mixture_assignment(self, new_assignment):
+        """
+        Set the mixture assignments according to user-provided dictionary. The mixture
+        assignment indicates which mixture component the effect size of a particular
+        variant comes from.
+        :param new_assignment: A dictionary where the keys are the chromosomes and
+        the values are the mixture assignment for each SNP on that chromosome.
+        """
+
+        # Check that the shapes match pre-specified information:
+        for c, c_size in self.shapes.items():
+            assert new_assignment[c].shape == (c_size, self.n_components)
+
+        self.mixture_assignment = new_assignment
+
+    def simulate_mixture_assignment(self):
+        """
+        Simulate assigning SNPs to the various mixture components
+        with probabilities given by mixing proportions `pi`.
+        """
+
+        if self.per_snp_pi is None or len(self.per_snp_pi) < 1:
+            self.set_per_snp_mixture_probability()
+
+        self.mixture_assignment = {}
+
+        from ..utils.model_utils import multinomial_rvs
+
+        for c, c_size in self.shapes.items():
+
+            self.mixture_assignment[c] = multinomial_rvs(1, self.per_snp_pi[c])
+
+        return self.mixture_assignment
+
+    def set_beta(self, new_beta):
+        """
+        Set the variant effect sizes (beta) according to user-provided dictionary.
+
+        :param new_beta: A dictionary where the keys are the chromosomes and
+        the values are the beta (effect size) for each SNP on that chromosome.
+        """
+
+        # Check that the shapes match pre-specified information:
+        for c, c_size in self.shapes.items():
+            assert len(new_beta[c]) == c_size
+
+        self.beta = new_beta
+
+    def simulate_beta(self):
+        """
+        Simulate the causal effect size for variants included
+        in the dataset. Here, the variant effect size is drawn from
+        a Gaussian density with mean zero and scale given by
+        the root of per-SNP heritability.
+        """
+
+        if self.per_snp_h2 is None or len(self.per_snp_h2) < 1:
+            self.set_per_snp_heritability()
+
+        self.beta = {}
+
+        for c, c_size in self.shapes.items():
+
+            self.beta[c] = np.random.normal(loc=0.0,
+                                            scale=np.sqrt(self.per_snp_h2[c]),
+                                            size=c_size)
+
+        return self.beta
+
+    def simulate_phenotype(self):
+        """
+        Simulate complex phenotypes for the samples present in the genotype matrix, given their
+        genotype information and fixed effect sizes `beta` that were simulated previous steps.
+
+        Given the simulated effect sizes, the phenotype is generated as follows:
+
+        `Y = XB + e`
+
+        Where `Y` is the vector of phenotypes, `X` is the genotype matrix, `B` is the vector of effect sizes,
+        and `e` represents the residual effects.
+        """
+
+        assert self.beta is not None
+
+        # Compute the polygenic score given the simulated/provided beta:
+        pgs = self.score(self.beta)
+
+        # Sample the environmental/residual component:
+        e = np.random.normal(loc=0., scale=np.sqrt(1. - self.h2), size=self.sample_size)
+
+        # The final simulated phenotype is a combination of
+        # the polygenic score + the residual component:
+        y = pgs + e
+
+        if self.phenotype_likelihood == 'binomial':
+            # If the simulated phenotype is to be binary,
+            # use the threshold model to determine positives/negatives
+            # based on the prevalence of the phenotype in the population:
+
+            from ..stats.transforms.phenotype import standardize
+
+            y = standardize(y)
+
+            from scipy.stats import norm
+
+            cutoff = norm.ppf(1. - self.prevalence)
+            new_y = np.zeros_like(y, dtype=int)
+            new_y[y > cutoff] = 1
+        else:
+            new_y = y
+
+        self.set_phenotype(new_y)
+
+        return new_y
+
+    def simulate(self,
+                 reset_beta=True,
+                 reset_mixture_assignment=True,
+                 perform_gwas=False):
+        """
+        A convenience method to simulate all the components of the generative model.
+        Specifically, the simulation follows the standard linear model, where the phenotype is 
+        dependent on the genotype + environmental components that are assumed to be uncorrelated:
+
+        `Y = XB + e`
+
+        Where `Y` is the vector of phenotypes, `X` is the genotype matrix, `B` is the vector of effect sizes, 
+        and `e` represents the residual effects. The generative model proceeds by:
+
+         1) Drawing the effect sizes `beta` from a Gaussian mixture density.
+         2) Drawing the residual effect from an isotropic Gaussian density.
+         3) Setting the phenotype according to the equation above. 
+
+        :param reset_beta: If True, reset the effect sizes by drawing new ones from the prior density.
+        :param reset_mixture_assignment: If True, reset the assignment of SNPs to mixture components. Set to False
+        if you'd like to keep the same configuration of causal SNPs.
+        :param perform_gwas: If True, automatically perform genome-wide association on the newly simulated phenotype.
+        """
+
+        # Simulate the mixture assignment:
+        if self.mixture_assignment is None or reset_mixture_assignment:
+            self.simulate_mixture_assignment()
+
+        # Set the per-SNP heritability based on the mixture assignment:
+        self.set_per_snp_heritability()
+
+        # Simulate betas based on per-SNP heritability
+        if self.beta is None or reset_beta:
+            self.simulate_beta()
+
+        # Simulate the phenotype
+        self.simulate_phenotype()
+
+        if perform_gwas:
+            # Perform genome-wide association testing...
+            self.perform_gwas()
+
+    def to_true_beta_table(self, per_chromosome=False):
+        """
+        Export the simulated true effect sizes and causal status into a pandas dataframe.
+        :param per_chromosome: If True, return a dictionary of tables for each chromosome separately.
+
+        :return: A pandas DataFrame with the true effect sizes and causal status for each variant.
+        """
+
+        assert self.beta is not None
+
+        eff_tables = {}
+        causal_status = self.get_causal_status()
+
+        for c in self.chromosomes:
+
+            eff_tables[c] = pd.DataFrame({
+                'CHR': c,
+                'SNP': self.genotype[c].snps,
+                'A1': self.genotype[c].a1,
+                'A2': self.genotype[c].a2,
+                'MixtureComponent': np.where(self.mixture_assignment[c] == 1)[1],
+                'Heritability': self.per_snp_h2[c],
+                'BETA': self.beta[c].flatten(),
+                'Causal': causal_status[c],
+            })
+
+        if per_chromosome:
+            return eff_tables
+        else:
+            return pd.concat(list(eff_tables.values()))
+
+
+ + + +
+ + + + + + + +
+ + + +

+ n_components + + + property + + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The number of Gaussian mixture components for the effect size distribution.

+
+
+
+ +
+ + + + +
+ + + +

+ __init__(bed_files, h2=0.2, pi=0.1, d=(0.0, 1.0), prevalence=0.15, **kwargs) + +

+ + +
+ +

Initialize the PhenotypeSimulator object with the necessary parameters.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bed_files + +
+

A path (or list of paths) to PLINK BED files containing the genotype information.

+
+
+ required +
h2 + +
+

The trait SNP heritability, or proportion of variance explained by SNPs.

+
+
+ 0.2 +
pi + +
+

The mixing proportions for the mixture of Gaussians (our model for the distribution of effect sizes). If a float is provided, it is converted to a tuple (1-pi, pi), where pi is the proportion of causal variants.

+
+
+ 0.1 +
d + +
+

The variance multipliers for each component of the Gaussian mixture density. By default, all components have the same variance multiplier.

+
+
+ (0.0, 1.0) +
prevalence + +
+

The (disease) prevalence for binary (case-control) phenotypes.

+
+
+ 0.15 +
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
def __init__(self,
+             bed_files,
+             h2=0.2,
+             pi=0.1,
+             d=(0., 1.),
+             prevalence=0.15,
+             **kwargs):
+    """
+    Initialize the PhenotypeSimulator object with the necessary parameters.
+
+    :param bed_files: A path (or list of paths) to PLINK BED files containing the genotype information.
+    :param h2: The trait SNP heritability, or proportion of variance explained by SNPs.
+    :param pi: The mixing proportions for the mixture of Gaussians (our model for the distribution of effect sizes).
+    If a float is provided, it is converted to a tuple (1-pi, pi), where pi is the proportion of causal variants.
+    :param d:  The variance multipliers for each component of the Gaussian mixture density. By default,
+    all components have the same variance multiplier.
+    :param prevalence: The (disease) prevalence for binary (case-control) phenotypes.
+    """
+
+    super().__init__(bed_files, **kwargs)
+
+    # If pi is float, convert it to a tuple:
+    if isinstance(pi, float):
+        pi = (1. - pi, pi)
+
+    self.pi = pi
+    self.h2 = h2
+    self.prevalence = prevalence
+
+    # Sanity checks:
+    assert 0. <= self.h2 <= 1.
+    assert round(sum(self.pi), 1) == 1.
+    assert 0. < self.prevalence < 1.
+
+    self.d = np.array(d)
+
+    self.per_snp_h2 = None
+    self.per_snp_pi = None
+    self.beta = None
+    self.mixture_assignment = None
+
+
+
+ +
+ + +
+ + + +

+ get_causal_status() + +

+ + +
+ + + + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A dictionary where the keys are the chromosome numbers and the values are binary vectors indicating which SNPs are causal for the simulated phenotype.

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ AssertionError + +
+

If the mixture assignment is not set.

+
+
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def get_causal_status(self):
+    """
+    :return: A dictionary where the keys are the chromosome numbers
+    and the values are binary vectors indicating which SNPs are
+    causal for the simulated phenotype.
+
+    :raises AssertionError: If the mixture assignment is not set.
+    """
+
+    assert self.mixture_assignment is not None
+
+    try:
+        zero_index = list(self.d).index(0)
+    except ValueError:
+        # If all SNPs are causal:
+        return {c: np.repeat(True, c_size) for c, c_size in self.shapes.items()}
+
+    causal_status = {}
+
+    for c, mix_a in self.mixture_assignment.items():
+        causal_status[c] = np.where(mix_a)[1] != zero_index
+
+    return causal_status
+
+
+
+ +
+ + +
+ + + +

+ set_beta(new_beta) + +

+ + +
+ +

Set the variant effect sizes (beta) according to user-provided dictionary.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_beta + +
+

A dictionary where the keys are the chromosomes and the values are the beta (effect size) for each SNP on that chromosome.

+
+
+ required +
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def set_beta(self, new_beta):
+    """
+    Set the variant effect sizes (beta) according to user-provided dictionary.
+
+    :param new_beta: A dictionary where the keys are the chromosomes and
+    the values are the beta (effect size) for each SNP on that chromosome.
+    """
+
+    # Check that the shapes match pre-specified information:
+    for c, c_size in self.shapes.items():
+        assert len(new_beta[c]) == c_size
+
+    self.beta = new_beta
+
+
+
+ +
+ + +
+ + + +

+ set_causal_snps(causal_snps) + +

+ + +
+ +

A utility method to set the causal variants in the simulation based on an array or +list of SNPs specified by the user. The method takes an iterable (e.g. list or array) of causal_snps +and then creates a new mixture assignment object where only the causal_snps +contribute to the phenotype.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
causal_snps + +
+

A list or array of SNP rsIDs.

+
+
+ required +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ ValueError + +
+

If all mixture components are causal.

+
+
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def set_causal_snps(self, causal_snps):
+    """
+    A utility method to set the causal variants in the simulation based on an array or
+    list of SNPs specified by the user. The method takes an iterable (e.g. list or array) of `causal_snps`
+    and then creates a new mixture assignment object where only the `causal_snps`
+    contribute to the phenotype.
+
+    :param causal_snps: A list or array of SNP rsIDs.
+    :raises ValueError: If all mixture components are causal.
+
+    """
+
+    # Get the index of the mixture component whose multiplier is zero (i.e. the null component):
+    try:
+        zero_index = list(self.d).index(0)
+    except ValueError:
+        raise ValueError("Cannot set causal variants when all mixture components are causal. Modify "
+                         "the mixture multipliers `d` to enable this functionality.")
+
+    # Get the indices of the non-null mixture components:
+    nonzero_indices = [i for i, d in enumerate(self.d) if d != 0.]
+
+    # Get the mixture proportions for the non-null components and normalize them so they sum to 1:
+    pis = np.array(self.pi)[nonzero_indices]
+    pis /= pis.sum()
+
+    # Initialize new mixture assignment object:
+    new_assignment = {c: np.zeros((s, self.n_components)) for c, s in self.shapes.items()}
+
+    from ..utils.compute_utils import intersect_arrays
+
+    n_causal_set = 0
+
+    for c, snps in self.snps.items():
+
+        # Intersect the list of causal SNPs with the SNPs on chromosome `c`:
+        snp_idx = intersect_arrays(snps, causal_snps, return_index=True)
+
+        if len(snp_idx) > 0:
+            n_causal_set += len(snp_idx)
+            # For the causal SNPs, assign them randomly to the causal components
+            new_assignment[c][snp_idx, np.random.choice(nonzero_indices,
+                                                        size=len(snp_idx),
+                                                        p=pis)] = 1
+            # For the remaining SNPs, assign them to the null component:
+            new_assignment[c][:, zero_index] = np.abs(new_assignment[c].sum(axis=1) - 1)
+
+    if n_causal_set < len(causal_snps):
+        warnings.warn("Not all causal SNPs are represented in the genotype matrix! "
+                      f"User passed a list of {len(causal_snps)} SNPs, only matched {n_causal_set}.")
+
+    self.set_mixture_assignment(new_assignment)
+
+
+
+ +
+ + +
+ + + +

+ set_h2(new_h2) + +

+ + +
+ +

Set the total heritability (proportion of additive variance due to SNPs) for the trait

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
93
+94
+95
+96
+97
+98
def set_h2(self, new_h2):
+    """
+    Set the total heritability (proportion of additive variance due to SNPs) for the trait
+    """
+    self.h2 = new_h2
+    self.set_per_snp_heritability()
+
+
+
+ +
+ + +
+ + + +

+ set_mixture_assignment(new_assignment) + +

+ + +
+ +

Set the mixture assignments according to user-provided dictionary. The mixture +assignment indicates which mixture component the effect size of a particular +variant comes from.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_assignment + +
+

A dictionary where the keys are the chromosomes and the values are the mixture assignment for each SNP on that chromosome.

+
+
+ required +
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def set_mixture_assignment(self, new_assignment):
+    """
+    Set the mixture assignments according to user-provided dictionary. The mixture
+    assignment indicates which mixture component the effect size of a particular
+    variant comes from.
+    :param new_assignment: A dictionary where the keys are the chromosomes and
+    the values are the mixture assignment for each SNP on that chromosome.
+    """
+
+    # Check that the shapes match pre-specified information:
+    for c, c_size in self.shapes.items():
+        assert new_assignment[c].shape == (c_size, self.n_components)
+
+    self.mixture_assignment = new_assignment
+
+
+
+ +
+ + +
+ + + +

+ set_per_snp_heritability() + +

+ + +
+ +

Set the per-SNP heritability (effect size variance) for each variant in the dataset. +This is a convenience method that may come in handy for more flexible generative models.

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def set_per_snp_heritability(self):
+    """
+    Set the per-SNP heritability (effect size variance) for each variant in the dataset.
+    This is a convenience method that may come in handy for more flexible generative models.
+    """
+
+    assert self.mixture_assignment is not None
+
+    # Estimate the global sigma_beta_sq based on the
+    # pre-specified heritability, the mixture proportions `pi`,
+    # and the prior multipliers `d`.
+
+    combined_assignments = np.concatenate([self.mixture_assignment[c] for c in self.chromosomes])
+
+    sigma_beta_sq = self.h2 / (combined_assignments*self.d).sum()
+
+    self.per_snp_h2 = {}
+
+    for c, c_size in self.shapes.items():
+        self.per_snp_h2[c] = sigma_beta_sq*self.d[np.where(self.mixture_assignment[c])[1]]
+
+
+
+ +
+ + +
+ + + +

+ set_per_snp_mixture_probability() + +

+ + +
+ +

Set the per-SNP mixing proportions for each variant in the dataset. +This is a convenience method that may come in handy for more flexible generative models.

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def set_per_snp_mixture_probability(self):
+    """
+    Set the per-SNP mixing proportions for each variant in the dataset.
+    This is a convenience method that may come in handy for more flexible generative models.
+    """
+
+    self.per_snp_pi = {}
+
+    for c, c_size in self.shapes.items():
+        self.per_snp_pi[c] = np.repeat(np.array([self.pi]), c_size, axis=0)
+
+
+
+ +
+ + +
+ + + +

+ set_pi(new_pi) + +

+ + +
+ +

Set the mixture proportions (proportion of variants in each +Gaussian mixture component).

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
85
+86
+87
+88
+89
+90
+91
def set_pi(self, new_pi):
+    """
+    Set the mixture proportions (proportion of variants in each
+    Gaussian mixture component).
+    """
+    self.pi = new_pi
+    self.set_per_snp_mixture_probability()
+
+
+
+ +
+ + +
+ + + +

+ simulate(reset_beta=True, reset_mixture_assignment=True, perform_gwas=False) + +

+ + +
+ +

A convenience method to simulate all the components of the generative model. +Specifically, the simulation follows the standard linear model, where the phenotype is +dependent on the genotype + environmental components that are assumed to be uncorrelated:

+

Y = XB + e

+

Where Y is the vector of phenotypes, X is the genotype matrix, B is the vector of effect sizes, +and e represents the residual effects. The generative model proceeds by:

+

1) Drawing the effect sizes beta from a Gaussian mixture density. + 2) Drawing the residual effect from an isotropic Gaussian density. + 3) Setting the phenotype according to the equation above.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
reset_beta + +
+

If True, reset the effect sizes by drawing new ones from the prior density.

+
+
+ True +
reset_mixture_assignment + +
+

If True, reset the assignment of SNPs to mixture components. Set to False if you'd like to keep the same configuration of causal SNPs.

+
+
+ True +
perform_gwas + +
+

If True, automatically perform genome-wide association on the newly simulated phenotype.

+
+
+ False +
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def simulate(self,
+             reset_beta=True,
+             reset_mixture_assignment=True,
+             perform_gwas=False):
+    """
+    A convenience method to simulate all the components of the generative model.
+    Specifically, the simulation follows the standard linear model, where the phenotype is 
+    dependent on the genotype + environmental components that are assumed to be uncorrelated:
+
+    `Y = XB + e`
+
+    Where `Y` is the vector of phenotypes, `X` is the genotype matrix, `B` is the vector of effect sizes, 
+    and `e` represents the residual effects. The generative model proceeds by:
+
+     1) Drawing the effect sizes `beta` from a Gaussian mixture density.
+     2) Drawing the residual effect from an isotropic Gaussian density.
+     3) Setting the phenotype according to the equation above. 
+
+    :param reset_beta: If True, reset the effect sizes by drawing new ones from the prior density.
+    :param reset_mixture_assignment: If True, reset the assignment of SNPs to mixture components. Set to False
+    if you'd like to keep the same configuration of causal SNPs.
+    :param perform_gwas: If True, automatically perform genome-wide association on the newly simulated phenotype.
+    """
+
+    # Simulate the mixture assignment:
+    if self.mixture_assignment is None or reset_mixture_assignment:
+        self.simulate_mixture_assignment()
+
+    # Set the per-SNP heritability based on the mixture assignment:
+    self.set_per_snp_heritability()
+
+    # Simulate betas based on per-SNP heritability
+    if self.beta is None or reset_beta:
+        self.simulate_beta()
+
+    # Simulate the phenotype
+    self.simulate_phenotype()
+
+    if perform_gwas:
+        # Perform genome-wide association testing...
+        self.perform_gwas()
+
+
+
+ +
+ + +
+ + + +

+ simulate_beta() + +

+ + +
+ +

Simulate the causal effect size for variants included +in the dataset. Here, the variant effect size is drawn from +a Gaussian density with mean zero and scale given by +the root of per-SNP heritability.

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def simulate_beta(self):
+    """
+    Simulate the causal effect size for variants included
+    in the dataset. Here, the variant effect size is drawn from
+    a Gaussian density with mean zero and scale given by
+    the root of per-SNP heritability.
+    """
+
+    if self.per_snp_h2 is None or len(self.per_snp_h2) < 1:
+        self.set_per_snp_heritability()
+
+    self.beta = {}
+
+    for c, c_size in self.shapes.items():
+
+        self.beta[c] = np.random.normal(loc=0.0,
+                                        scale=np.sqrt(self.per_snp_h2[c]),
+                                        size=c_size)
+
+    return self.beta
+
+
+
+ +
+ + +
+ + + +

+ simulate_mixture_assignment() + +

+ + +
+ +

Simulate assigning SNPs to the various mixture components +with probabilities given by mixing proportions pi.

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def simulate_mixture_assignment(self):
+    """
+    Simulate assigning SNPs to the various mixture components
+    with probabilities given by mixing proportions `pi`.
+    """
+
+    if self.per_snp_pi is None or len(self.per_snp_pi) < 1:
+        self.set_per_snp_mixture_probability()
+
+    self.mixture_assignment = {}
+
+    from ..utils.model_utils import multinomial_rvs
+
+    for c, c_size in self.shapes.items():
+
+        self.mixture_assignment[c] = multinomial_rvs(1, self.per_snp_pi[c])
+
+    return self.mixture_assignment
+
+
+
+ +
+ + +
+ + + +

+ simulate_phenotype() + +

+ + +
+ +

Simulate complex phenotypes for the samples present in the genotype matrix, given their +genotype information and fixed effect sizes beta that were simulated previous steps.

+

Given the simulated effect sizes, the phenotype is generated as follows:

+

Y = XB + e

+

Where Y is the vector of phenotypes, X is the genotype matrix, B is the vector of effect sizes, +and e represents the residual effects.

+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def simulate_phenotype(self):
+    """
+    Simulate complex phenotypes for the samples present in the genotype matrix, given their
+    genotype information and fixed effect sizes `beta` that were simulated previous steps.
+
+    Given the simulated effect sizes, the phenotype is generated as follows:
+
+    `Y = XB + e`
+
+    Where `Y` is the vector of phenotypes, `X` is the genotype matrix, `B` is the vector of effect sizes,
+    and `e` represents the residual effects.
+    """
+
+    assert self.beta is not None
+
+    # Compute the polygenic score given the simulated/provided beta:
+    pgs = self.score(self.beta)
+
+    # Sample the environmental/residual component:
+    e = np.random.normal(loc=0., scale=np.sqrt(1. - self.h2), size=self.sample_size)
+
+    # The final simulated phenotype is a combination of
+    # the polygenic score + the residual component:
+    y = pgs + e
+
+    if self.phenotype_likelihood == 'binomial':
+        # If the simulated phenotype is to be binary,
+        # use the threshold model to determine positives/negatives
+        # based on the prevalence of the phenotype in the population:
+
+        from ..stats.transforms.phenotype import standardize
+
+        y = standardize(y)
+
+        from scipy.stats import norm
+
+        cutoff = norm.ppf(1. - self.prevalence)
+        new_y = np.zeros_like(y, dtype=int)
+        new_y[y > cutoff] = 1
+    else:
+        new_y = y
+
+    self.set_phenotype(new_y)
+
+    return new_y
+
+
+
+ +
+ + +
+ + + +

+ to_true_beta_table(per_chromosome=False) + +

+ + +
+ +

Export the simulated true effect sizes and causal status into a pandas dataframe.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
per_chromosome + +
+

If True, return a dictionary of tables for each chromosome separately.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A pandas DataFrame with the true effect sizes and causal status for each variant.

+
+
+ +
+ Source code in magenpy/simulation/PhenotypeSimulator.py +
def to_true_beta_table(self, per_chromosome=False):
+    """
+    Export the simulated true effect sizes and causal status into a pandas dataframe.
+    :param per_chromosome: If True, return a dictionary of tables for each chromosome separately.
+
+    :return: A pandas DataFrame with the true effect sizes and causal status for each variant.
+    """
+
+    assert self.beta is not None
+
+    eff_tables = {}
+    causal_status = self.get_causal_status()
+
+    for c in self.chromosomes:
+
+        eff_tables[c] = pd.DataFrame({
+            'CHR': c,
+            'SNP': self.genotype[c].snps,
+            'A1': self.genotype[c].a1,
+            'A2': self.genotype[c].a2,
+            'MixtureComponent': np.where(self.mixture_assignment[c] == 1)[1],
+            'Heritability': self.per_snp_h2[c],
+            'BETA': self.beta[c].flatten(),
+            'Causal': causal_status[c],
+        })
+
+    if per_chromosome:
+        return eff_tables
+    else:
+        return pd.concat(list(eff_tables.values()))
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/gwa/utils/index.html b/api/stats/gwa/utils/index.html new file mode 100644 index 0000000..32d0a30 --- /dev/null +++ b/api/stats/gwa/utils/index.html @@ -0,0 +1,1742 @@ + + + + + + + + + + + + + + + + + + + Utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ inflation_factor(sumstats_input) + +

+ + +
+ +

Compute the genomic control (GC) inflation factor (also known as lambda) +from GWAS summary statistics.

+

The inflation factor can be used to detect and correct inflation in the test statistics.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sumstats_input + Union[GWADataLoader, SumstatsTable, array] + +
+

The input can be one of three classes of objects: A GWADataLoader object, a SumstatsTable object, or a numpy array of chi-squared statistics to compute the inflation factor.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The inflation factor (lambda) computed from the chi-squared statistics.

+
+
+ +
+ Source code in magenpy/stats/gwa/utils.py +
11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
def inflation_factor(sumstats_input: Union[GWADataLoader, SumstatsTable, np.array]):
+    """
+    Compute the genomic control (GC) inflation factor (also known as lambda)
+    from GWAS summary statistics.
+
+    The inflation factor can be used to detect and correct inflation in the test statistics.
+
+    :param sumstats_input: The input can be one of three classes of objects: A GWADataLoader object,
+    a SumstatsTable object, or a numpy array of chi-squared statistics to compute the inflation factor.
+
+    :return: The inflation factor (lambda) computed from the chi-squared statistics.
+    """
+
+    if isinstance(sumstats_input, GWADataLoader):
+        chisq = np.concatenate([ss.get_chisq_statistic() for ss in sumstats_input.sumstats_table.values()])
+    elif isinstance(sumstats_input, SumstatsTable):
+        chisq = sumstats_input.get_chisq_statistic()
+    else:
+        chisq = sumstats_input
+
+    from scipy.stats import chi2
+
+    return np.median(chisq) / chi2.median(1)
+
+
+
+ +
+ + +
+ + + +

+ perform_gwa_plink1p9(genotype_matrix, temp_dir='temp', **phenotype_transform_kwargs) + +

+ + +
+ +

Perform genome-wide association testing using plink 1.9 +This function takes a GenotypeMatrix object and gwas-related flags and +calls plink to perform GWA on the genotype and phenotype data referenced +by the GenotypeMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A plinkBEDGenotypeMatrix object.

+
+
+ required +
temp_dir + +
+

Path to a directory where we keep intermediate temporary files from plink.

+
+
+ 'temp' +
phenotype_transform_kwargs + +
+

Keyword arguments to pass to the chained_transform function. These arguments include the following options to transform the phenotype before performing GWAS: adjust_covariates, standardize_phenotype, rint_phenotype, and outlier_sigma_threshold. NOTE: These transformations are only applied to continuous phenotypes (likelihood='gaussian').

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SumstatsTable object containing the summary statistics from the association tests.

+
+
+ +
+ Source code in magenpy/stats/gwa/utils.py +
def perform_gwa_plink1p9(genotype_matrix,
+                         temp_dir='temp',
+                         **phenotype_transform_kwargs):
+    """
+    Perform genome-wide association testing using plink 1.9
+    This function takes a GenotypeMatrix object and gwas-related flags and
+    calls plink to perform GWA on the genotype and phenotype data referenced
+    by the GenotypeMatrix object.
+
+    :param genotype_matrix: A plinkBEDGenotypeMatrix object.
+    :param temp_dir: Path to a directory where we keep intermediate temporary files from plink.
+    :param phenotype_transform_kwargs: Keyword arguments to pass to the `chained_transform` function. These arguments
+    include the following options to transform the phenotype before performing GWAS:
+    `adjust_covariates`, `standardize_phenotype`, `rint_phenotype`, and `outlier_sigma_threshold`. NOTE: These
+    transformations are only applied to continuous phenotypes (`likelihood='gaussian'`).
+
+    :return: A SumstatsTable object containing the summary statistics from the association tests.
+    """
+
+    from ...GenotypeMatrix import plinkBEDGenotypeMatrix
+    from ...utils.executors import plink1Executor
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink1 = plink1Executor()
+
+    s_table = genotype_matrix.sample_table
+
+    if s_table.phenotype_likelihood is None:
+        warnings.warn("The phenotype likelihood is not specified! "
+                      "Assuming that the phenotype is continuous...")
+
+    # Transform the phenotype:
+    phenotype, mask = chained_transform(s_table, **phenotype_transform_kwargs)
+
+    # Prepare the phenotype table to pass to plink:
+    phe_table = s_table.get_phenotype_table()
+
+    # If the likelihood is binomial, transform the phenotype into
+    # plink's coding for case/control (1/2) rather than (0/1).
+    if s_table.phenotype_likelihood == 'binomial':
+        phe_table['phenotype'] += 1
+    else:
+        phe_table = phe_table.loc[mask, :]
+        phe_table['phenotype'] = phenotype
+
+    # Output phenotype table:
+    phe_fname = osp.join(temp_dir, "pheno.txt")
+    phe_table.to_csv(phe_fname, sep="\t", index=False, header=False)
+
+    # Process covariates:
+    if s_table.phenotype_likelihood == 'binomial' and 'adjust_covariates' in phenotype_transform_kwargs and \
+            phenotype_transform_kwargs['adjust_covariates']:
+
+        covar_fname = osp.join(temp_dir, "covar.txt")
+        covar = s_table.get_covariates_table().loc[mask, :]
+        covar.to_csv(covar_fname, sep="\t", index=False, header=False)
+    else:
+        covar_fname = None
+
+    # Determine regression type based on phenotype likelihood:
+    plink_reg_type = ['linear', 'logistic'][s_table.phenotype_likelihood == 'binomial']
+
+    # Output subset of SNPs to perform association tests on:
+    snp_keepfile = osp.join(temp_dir, "variants.keep")
+    pd.DataFrame({'SNP': genotype_matrix.snps}).to_csv(
+        snp_keepfile, index=False, header=False
+    )
+
+    # Output file:
+    plink_output = osp.join(temp_dir, "output")
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file}",
+        f"--extract {snp_keepfile}",
+        f"--{plink_reg_type} hide-covar",
+        f"--pheno {phe_fname}",
+        f"--out {plink_output}"
+    ]
+
+    if covar_fname is not None:
+        cmd.append(f'--covar {covar_fname}')
+
+    plink1.execute(cmd)
+
+    output_fname = plink_output + f".PHENO1.assoc.{plink_reg_type}"
+
+    if not osp.isfile(output_fname):
+        if plink_reg_type == 'logistic' and osp.isfile(output_fname + ".hybrid"):
+            output_fname += ".hybrid"
+        else:
+            raise FileNotFoundError
+
+    # Read the summary statistics file from plink:
+    ss_table = SumstatsTable.from_file(output_fname, sumstats_format='plink1.9')
+    # Infer the reference allele:
+    ss_table.infer_a2(genotype_matrix.snp_table)
+
+    # Make sure that the effect allele is encoded properly:
+    ss_table.match(genotype_matrix.snp_table, correct_flips=True)
+
+    return ss_table
+
+
+
+ +
+ + +
+ + + +

+ perform_gwa_plink2(genotype_matrix, temp_dir='temp', **phenotype_transform_kwargs) + +

+ + +
+ +

Perform genome-wide association testing using plink 2.0 +This function takes a GenotypeMatrix object and gwas-related flags and +calls plink to perform GWA on the genotype and phenotype data referenced +by the GenotypeMatrix object.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A plinkBEDGenotypeMatrix object.

+
+
+ required +
temp_dir + +
+

Path to a directory where we keep intermediate temporary files from plink.

+
+
+ 'temp' +
phenotype_transform_kwargs + +
+

Keyword arguments to pass to the chained_transform function. These arguments include the following options to transform the phenotype before performing GWAS: adjust_covariates, standardize_phenotype, rint_phenotype, and outlier_sigma_threshold. NOTE: These transformations are only applied to continuous phenotypes (likelihood='gaussian').

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SumstatsTable object containing the summary statistics from the association tests.

+
+
+ +
+ Source code in magenpy/stats/gwa/utils.py +
def perform_gwa_plink2(genotype_matrix,
+                       temp_dir='temp',
+                       **phenotype_transform_kwargs):
+    """
+
+    Perform genome-wide association testing using plink 2.0
+    This function takes a GenotypeMatrix object and gwas-related flags and
+    calls plink to perform GWA on the genotype and phenotype data referenced
+    by the GenotypeMatrix object.
+
+    :param genotype_matrix: A plinkBEDGenotypeMatrix object.
+    :param temp_dir: Path to a directory where we keep intermediate temporary files from plink.
+    :param phenotype_transform_kwargs: Keyword arguments to pass to the `chained_transform` function. These arguments
+    include the following options to transform the phenotype before performing GWAS:
+    `adjust_covariates`, `standardize_phenotype`, `rint_phenotype`, and `outlier_sigma_threshold`. NOTE: These
+    transformations are only applied to continuous phenotypes (`likelihood='gaussian'`).
+
+    :return: A SumstatsTable object containing the summary statistics from the association tests.
+    """
+
+    from ...GenotypeMatrix import plinkBEDGenotypeMatrix
+    from ...utils.executors import plink2Executor
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink2 = plink2Executor()
+
+    s_table = genotype_matrix.sample_table
+
+    if s_table.phenotype_likelihood is None:
+        warnings.warn("The phenotype likelihood is not specified! "
+                      "Assuming that the phenotype is continuous...")
+
+    # Transform the phenotype:
+    phenotype, mask = chained_transform(s_table, **phenotype_transform_kwargs)
+
+    # Prepare the phenotype table to pass to plink:
+    phe_table = s_table.get_phenotype_table()
+
+    # If the likelihood is binomial, transform the phenotype into
+    # plink's coding for case/control (1/2) rather than (0/1).
+    if s_table.phenotype_likelihood == 'binomial':
+        phe_table['phenotype'] += 1
+    else:
+        phe_table = phe_table.loc[mask, :]
+        phe_table['phenotype'] = phenotype
+
+    # Output phenotype table:
+    phe_fname = osp.join(temp_dir, "pheno.txt")
+    phe_table.to_csv(phe_fname, sep="\t", index=False, header=False)
+
+    # Process covariates:
+    if s_table.phenotype_likelihood == 'binomial' and 'adjust_covariates' in phenotype_transform_kwargs and \
+            phenotype_transform_kwargs['adjust_covariates']:
+
+        covar_fname = osp.join(temp_dir, "covar.txt")
+        covar = s_table.get_covariates_table().loc[mask, :]
+        covar.to_csv(covar_fname, sep="\t", index=False, header=False)
+        covar_modifier = ''
+    else:
+        covar_fname = None
+        covar_modifier = ' allow-no-covars'
+
+    # Determine regression type based on phenotype likelihood:
+    plink_reg_type = ['linear', 'logistic'][s_table.phenotype_likelihood == 'binomial']
+
+    # Output subset of SNPs to perform association tests on:
+    snp_keepfile = osp.join(temp_dir, "variants.keep")
+    pd.DataFrame({'SNP': genotype_matrix.snps}).to_csv(
+        snp_keepfile, index=False, header=False
+    )
+
+    # Output file:
+    plink_output = osp.join(temp_dir, "output")
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file}",
+        f"--extract {snp_keepfile}",
+        f"--{plink_reg_type} hide-covar{covar_modifier} cols=chrom,pos,alt1,ref,a1freq,nobs,beta,se,tz,p",
+        f"--pheno {phe_fname}",
+        f"--out {plink_output}"
+    ]
+
+    if covar_fname is not None:
+        cmd.append(f'--covar {covar_fname}')
+
+    plink2.execute(cmd)
+
+    output_fname = plink_output + f".PHENO1.glm.{plink_reg_type}"
+
+    if not osp.isfile(output_fname):
+        if plink_reg_type == 'logistic' and osp.isfile(output_fname + ".hybrid"):
+            output_fname += ".hybrid"
+        else:
+            raise FileNotFoundError
+
+    # Read the summary statistics file from plink:
+    ss_table = SumstatsTable.from_file(output_fname, sumstats_format='plink2')
+    # Make sure that the effect allele is encoded properly:
+    ss_table.match(genotype_matrix.snp_table, correct_flips=True)
+
+    return ss_table
+
+
+
+ +
+ + +
+ + + +

+ perform_gwa_xarray(genotype_matrix, standardize_genotype=False, **phenotype_transform_kwargs) + +

+ + +
+ +

Perform genome-wide association testing using xarray and the PyData ecosystem. +This function takes a GenotypeMatrix object and gwas-related flags and +calls performs (simple) GWA on the genotype and phenotype data referenced +by the GenotypeMatrix object. This function only implements GWA testing for +continuous phenotypes. For other functionality (e.g. case-control GWAS), +please use plink as a backend or consult other GWAS software (e.g. GCTA or REGENIE).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A GenotypeMatrix object.

+
+
+ required +
standardize_genotype + +
+

If True, the genotype matrix will be standardized such that the columns (i.e. SNPs) have zero mean and unit variance.

+
+
+ False +
phenotype_transform_kwargs + +
+

Keyword arguments to pass to the chained_transform function. These arguments include the following options to transform the phenotype before performing GWAS: adjust_covariates, standardize_phenotype, rint_phenotype, and outlier_sigma_threshold. NOTE: These transformations are only applied to continuous phenotypes (likelihood='gaussian').

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A SumstatsTable object containing the summary statistics from the association tests.

+
+
+ +
+ Source code in magenpy/stats/gwa/utils.py +
def perform_gwa_xarray(genotype_matrix,
+                       standardize_genotype=False,
+                       **phenotype_transform_kwargs):
+    """
+    Perform genome-wide association testing using xarray and the PyData ecosystem.
+    This function takes a GenotypeMatrix object and gwas-related flags and
+    calls performs (simple) GWA on the genotype and phenotype data referenced
+    by the GenotypeMatrix object. This function only implements GWA testing for
+    continuous phenotypes. For other functionality (e.g. case-control GWAS),
+    please use `plink` as a backend or consult other GWAS software (e.g. GCTA or REGENIE).
+
+    :param genotype_matrix: A GenotypeMatrix object.
+    :param standardize_genotype: If True, the genotype matrix will be standardized such that the columns (i.e. SNPs)
+    have zero mean and unit variance.
+    :param phenotype_transform_kwargs: Keyword arguments to pass to the `chained_transform` function. These arguments
+    include the following options to transform the phenotype before performing GWAS:
+    `adjust_covariates`, `standardize_phenotype`, `rint_phenotype`, and `outlier_sigma_threshold`. NOTE: These
+    transformations are only applied to continuous phenotypes (`likelihood='gaussian'`).
+
+    :return: A SumstatsTable object containing the summary statistics from the association tests.
+    """
+
+    # Sanity checks:
+
+    # Check that the genotype matrix is an xarrayGenotypeMatrix object.
+    from ...GenotypeMatrix import xarrayGenotypeMatrix
+    assert isinstance(genotype_matrix, xarrayGenotypeMatrix)
+
+    # Check that the phenotype likelihood is set correctly and that the phenotype is continuous.
+    if genotype_matrix.sample_table.phenotype_likelihood is None:
+        warnings.warn("The phenotype likelihood is not specified! "
+                      "Assuming that the phenotype is continuous...")
+    elif genotype_matrix.sample_table.phenotype_likelihood == 'binomial':
+        raise ValueError("The xarray backend currently does not support performing association "
+                         "testing on binary (case-control) phenotypes! Try setting the backend to `plink` or "
+                         "use external software (e.g. GCTA or REGENIE) for performing GWAS.")
+
+    # -----------------------------------------------------------
+
+    # Get the SNP table from the genotype_matrix object:
+    sumstats_table = genotype_matrix.get_snp_table(
+        ['CHR', 'SNP', 'POS', 'A1', 'A2', 'N', 'MAF']
+    )
+
+    # -----------------------------------------------------------
+
+    # Transform the phenotype:
+    phenotype, mask = chained_transform(genotype_matrix.sample_table, **phenotype_transform_kwargs)
+
+    # TODO: Figure out how to adjust the per-variant sample size based on the mask!
+
+    # Estimate the phenotypic variance:
+    sigma_sq_y = np.var(phenotype)
+
+    # -----------------------------------------------------------
+    # Perform association testing using closed-form solutions:
+
+    # Apply the mask to the genotype matrix:
+    xr_mat = genotype_matrix.xr_mat[mask, :]
+
+    if standardize_genotype:
+
+        from ..transforms.genotype import standardize
+
+        sumstats_table['BETA'] = np.dot(standardize(xr_mat).T, phenotype) / sumstats_table['N'].values
+        sumstats_table['SE'] = np.sqrt(sigma_sq_y / sumstats_table['N'].values)
+    else:
+
+        sumstats_table['BETA'] = (
+            np.dot(xr_mat.fillna(sumstats_table['MAF'].values).T, phenotype) /
+            sumstats_table['N'].values * genotype_matrix.maf_var
+        )
+
+        sumstats_table['SE'] = np.sqrt(sigma_sq_y / (sumstats_table['N'].values * genotype_matrix.maf_var))
+
+    ss_table = SumstatsTable(sumstats_table)
+    # Trigger computing z-score and p-values from the BETA and SE columns:
+    _, _ = ss_table.z_score, ss_table.pval
+
+    return ss_table
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/h2/ldsc/index.html b/api/stats/h2/ldsc/index.html new file mode 100644 index 0000000..d3892b5 --- /dev/null +++ b/api/stats/h2/ldsc/index.html @@ -0,0 +1,1138 @@ + + + + + + + + + + + + + + + + + + + Ldsc - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Ldsc

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ LDSCRegression + + +

+ + +
+

+ Bases: object

+ + +

Perform LD Score Regression using the jackknife method.

+ +
+ Source code in magenpy/stats/h2/ldsc.py +
38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
class LDSCRegression(object):
+    """
+    Perform LD Score Regression using the jackknife method.
+    """
+
+    def __init__(self, gdl: GWADataLoader, n_blocks=200, max_chisq=None):
+        """
+        :param gdl: An instance of GWADataLoader
+        :param n_blocks: The number of blocks to use for the jackknife method.
+        :param max_chisq: The maximum Chi-Squared statistic to consider.
+        """
+
+        self.gdl = gdl
+        self.n_blocks = n_blocks
+
+        # ...
+
+    def fit(self):
+        """
+        Perform LD Score Regression estimation using the jackknife method.
+
+        :raises NotImplementedError: If method is not implemented.
+        """
+
+        raise NotImplementedError
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(gdl, n_blocks=200, max_chisq=None) + +

+ + +
+ + + + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
gdl + GWADataLoader + +
+

An instance of GWADataLoader

+
+
+ required +
n_blocks + +
+

The number of blocks to use for the jackknife method.

+
+
+ 200 +
max_chisq + +
+

The maximum Chi-Squared statistic to consider.

+
+
+ None +
+ +
+ Source code in magenpy/stats/h2/ldsc.py +
43
+44
+45
+46
+47
+48
+49
+50
+51
def __init__(self, gdl: GWADataLoader, n_blocks=200, max_chisq=None):
+    """
+    :param gdl: An instance of GWADataLoader
+    :param n_blocks: The number of blocks to use for the jackknife method.
+    :param max_chisq: The maximum Chi-Squared statistic to consider.
+    """
+
+    self.gdl = gdl
+    self.n_blocks = n_blocks
+
+
+
+ +
+ + +
+ + + +

+ fit() + +

+ + +
+ +

Perform LD Score Regression estimation using the jackknife method.

+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ NotImplementedError + +
+

If method is not implemented.

+
+
+ +
+ Source code in magenpy/stats/h2/ldsc.py +
55
+56
+57
+58
+59
+60
+61
+62
def fit(self):
+    """
+    Perform LD Score Regression estimation using the jackknife method.
+
+    :raises NotImplementedError: If method is not implemented.
+    """
+
+    raise NotImplementedError
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + +
+ + + +

+ simple_ldsc(gdl) + +

+ + +
+ +

Provides an estimate of SNP heritability from summary statistics using +a simplified version of the LD Score Regression framework. +E[X_j^2] = h^2*l_j + int +Where the response is the Chi-Squared statistic for SNP j +and the variable is its LD score.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
gdl + GWADataLoader + +
+

An instance of GWADataLoader with the LD information and summary statistics initialized properly.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The estimated SNP heritability.

+
+
+ +
+ Source code in magenpy/stats/h2/ldsc.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
def simple_ldsc(gdl: GWADataLoader):
+    """
+    Provides an estimate of SNP heritability from summary statistics using
+    a simplified version of the LD Score Regression framework.
+    E[X_j^2] = h^2*l_j + int
+    Where the response is the Chi-Squared statistic for SNP j
+    and the variable is its LD score.
+
+    :param gdl: An instance of `GWADataLoader` with the LD information and
+    summary statistics initialized properly.
+
+    :return: The estimated SNP heritability.
+    """
+
+    # Check data types:
+    assert gdl.ld is not None and gdl.sumstats_table is not None
+
+    ld_score = []
+    chi_sq = []
+    sample_size = []
+
+    for c in gdl.chromosomes:
+        ld_score.append(gdl.ld[c].ld_score)
+        chi_sq.append(gdl.sumstats_table[c].get_chisq_statistic())
+        sample_size.append(gdl.sumstats_table[c].n_per_snp.max())
+
+    ld_score = np.concatenate(ld_score)
+    chi_sq = np.concatenate(chi_sq)
+    sample_size = max(sample_size)
+
+    return (chi_sq.mean() - 1.) * len(ld_score) / (ld_score.mean() * sample_size)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/ld/estimator/index.html b/api/stats/ld/estimator/index.html new file mode 100644 index 0000000..ef9dc56 --- /dev/null +++ b/api/stats/ld/estimator/index.html @@ -0,0 +1,4125 @@ + + + + + + + + + + + + + + + + + + + Estimator - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Estimator

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ BlockLD + + +

+ + +
+

+ Bases: SampleLD

+ + +

A wrapper class to facilitate computing block-based Linkage-Disequilibrium (LD) matrices. +Block-based LD matrices are a way to reduce the memory requirements of the LD matrix by +computing the pairwise correlation coefficients only between SNPs that are within the same LD block.

+

LD blocks can be inferred by external software tools, such as LDetect of Berisa and Pickrell (2016):

+

Berisa T, Pickrell JK. Approximately independent linkage disequilibrium blocks in human populations. +Bioinformatics. 2016 Jan 15;32(2):283-5. doi: 10.1093/bioinformatics/btv546. +Epub 2015 Sep 22. PMID: 26395773; PMCID: PMC4731402.

+

The BlockLD estimator requires the LD blocks to be provided as input. The LD blocks are a Bx2 matrix +where B is the number of blocks and the columns are the start and end of each block, respectively.

+
+

See Also

+ +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
ld_blocks + +
+

The LD blocks, a Bx2 matrix where B is the number of blocks and the columns are the start and end of each block, respectively.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
class BlockLD(SampleLD):
+    """
+    A wrapper class to facilitate computing block-based Linkage-Disequilibrium (LD) matrices.
+    Block-based LD matrices are a way to reduce the memory requirements of the LD matrix by
+    computing the pairwise correlation coefficients only between SNPs that are within the same LD block.
+
+    LD blocks can be inferred by external software tools, such as `LDetect` of Berisa and Pickrell (2016):
+
+    Berisa T, Pickrell JK. Approximately independent linkage disequilibrium blocks in human populations.
+    Bioinformatics. 2016 Jan 15;32(2):283-5. doi: 10.1093/bioinformatics/btv546.
+    Epub 2015 Sep 22. PMID: 26395773; PMCID: PMC4731402.
+
+    The `BlockLD` estimator requires the LD blocks to be provided as input. The LD blocks are a Bx2 matrix
+    where B is the number of blocks and the columns are the start and end of each block, respectively.
+
+    !!! seealso "See Also"
+        * [WindowedLD][magenpy.stats.ld.estimator.WindowedLD]
+        * [ShrinkageLD][magenpy.stats.ld.estimator.ShrinkageLD]
+
+    :ivar genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :ivar ld_blocks: The LD blocks, a Bx2 matrix where B is the number of blocks and the columns are
+    the start and end of each block, respectively.
+
+    """
+
+    def __init__(self,
+                 genotype_matrix,
+                 ld_blocks=None,
+                 ld_blocks_file=None):
+        """
+        Initialize the block-based LD estimator with a genotype matrix and LD blocks.
+
+        :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+        :param ld_blocks: The LD blocks, a Bx2 matrix where B is the number of blocks and the
+        columns are the start and end of each block, respectively.
+        :param ld_blocks_file: The path to the LD blocks file
+        """
+
+        assert ld_blocks_file is not None or ld_blocks is not None
+
+        super().__init__(genotype_matrix=genotype_matrix)
+
+        if ld_blocks is None:
+            from ...parsers.misc_parsers import parse_ld_block_data
+            self.ld_blocks = parse_ld_block_data(ld_blocks_file)[self.genotype_matrix.chromosome]
+
+    def compute_ld_boundaries(self):
+        """
+        Compute the per-SNP Linkage-Disequilibrium (LD) boundaries for the block-based estimator.
+
+        :return: A 2xM matrix of LD boundaries.
+        """
+
+        from .c_utils import find_ld_block_boundaries
+        return find_ld_block_boundaries(self.genotype_matrix.bp_pos, self.ld_blocks)
+
+    def compute(self,
+                output_dir,
+                temp_dir='temp',
+                overwrite=True,
+                delete_original=True,
+                dtype='int16',
+                compressor_name='lz4',
+                compression_level=5):
+        """
+
+        Compute the block-based LD matrix and store in Zarr array format.
+
+        :param output_dir: The path where to store the resulting LD matrix.
+        :param temp_dir: A temporary directory to store intermediate files and results.
+        :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+        :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+        :param dtype: The data type for the entries of the LD matrix.
+        :param compressor_name: The name of the compressor to use for the LD matrix.
+        :param compression_level: The compression level to use for the LD matrix (1-9).
+
+        :return: An instance of `LDMatrix` containing the computed LD matrix.
+        """
+
+        ld_mat = super().compute(output_dir,
+                                 temp_dir,
+                                 overwrite=overwrite,
+                                 delete_original=delete_original,
+                                 dtype=dtype,
+                                 compressor_name=compressor_name,
+                                 compression_level=compression_level)
+
+        ld_mat.set_store_attr('LD estimator', 'block')
+
+        ld_mat.set_store_attr('Estimator properties', {
+            'LD blocks': self.ld_blocks.tolist()
+        })
+
+        return ld_mat
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(genotype_matrix, ld_blocks=None, ld_blocks_file=None) + +

+ + +
+ +

Initialize the block-based LD estimator with a genotype matrix and LD blocks.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
+ required +
ld_blocks + +
+

The LD blocks, a Bx2 matrix where B is the number of blocks and the columns are the start and end of each block, respectively.

+
+
+ None +
ld_blocks_file + +
+

The path to the LD blocks file

+
+
+ None +
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def __init__(self,
+             genotype_matrix,
+             ld_blocks=None,
+             ld_blocks_file=None):
+    """
+    Initialize the block-based LD estimator with a genotype matrix and LD blocks.
+
+    :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :param ld_blocks: The LD blocks, a Bx2 matrix where B is the number of blocks and the
+    columns are the start and end of each block, respectively.
+    :param ld_blocks_file: The path to the LD blocks file
+    """
+
+    assert ld_blocks_file is not None or ld_blocks is not None
+
+    super().__init__(genotype_matrix=genotype_matrix)
+
+    if ld_blocks is None:
+        from ...parsers.misc_parsers import parse_ld_block_data
+        self.ld_blocks = parse_ld_block_data(ld_blocks_file)[self.genotype_matrix.chromosome]
+
+
+
+ +
+ + +
+ + + +

+ compute(output_dir, temp_dir='temp', overwrite=True, delete_original=True, dtype='int16', compressor_name='lz4', compression_level=5) + +

+ + +
+ +

Compute the block-based LD matrix and store in Zarr array format.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_dir + +
+

The path where to store the resulting LD matrix.

+
+
+ required +
temp_dir + +
+

A temporary directory to store intermediate files and results.

+
+
+ 'temp' +
overwrite + +
+

If True, overwrite any existing LD matrices in temp_dir and output_dir.

+
+
+ True +
delete_original + +
+

If True, deletes dense or intermediate LD matrices generated along the way.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the LD matrix.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the LD matrix (1-9).

+
+
+ 5 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An instance of LDMatrix containing the computed LD matrix.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute(self,
+            output_dir,
+            temp_dir='temp',
+            overwrite=True,
+            delete_original=True,
+            dtype='int16',
+            compressor_name='lz4',
+            compression_level=5):
+    """
+
+    Compute the block-based LD matrix and store in Zarr array format.
+
+    :param output_dir: The path where to store the resulting LD matrix.
+    :param temp_dir: A temporary directory to store intermediate files and results.
+    :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+    :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+    :param dtype: The data type for the entries of the LD matrix.
+    :param compressor_name: The name of the compressor to use for the LD matrix.
+    :param compression_level: The compression level to use for the LD matrix (1-9).
+
+    :return: An instance of `LDMatrix` containing the computed LD matrix.
+    """
+
+    ld_mat = super().compute(output_dir,
+                             temp_dir,
+                             overwrite=overwrite,
+                             delete_original=delete_original,
+                             dtype=dtype,
+                             compressor_name=compressor_name,
+                             compression_level=compression_level)
+
+    ld_mat.set_store_attr('LD estimator', 'block')
+
+    ld_mat.set_store_attr('Estimator properties', {
+        'LD blocks': self.ld_blocks.tolist()
+    })
+
+    return ld_mat
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_boundaries() + +

+ + +
+ +

Compute the per-SNP Linkage-Disequilibrium (LD) boundaries for the block-based estimator.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A 2xM matrix of LD boundaries.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute_ld_boundaries(self):
+    """
+    Compute the per-SNP Linkage-Disequilibrium (LD) boundaries for the block-based estimator.
+
+    :return: A 2xM matrix of LD boundaries.
+    """
+
+    from .c_utils import find_ld_block_boundaries
+    return find_ld_block_boundaries(self.genotype_matrix.bp_pos, self.ld_blocks)
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ SampleLD + + +

+ + +
+

+ Bases: object

+ + +

A basic wrapper class to facilitate computing Linkage-Disequilibrium (LD) matrices.

+

Linkage-Disequilibrium (LD) is a measure of the SNP-by-SNP pairwise correlation between +genetic variants in a population. LD tends to decay with genomic distance, and the rate +of decay is influenced by many factors. Therefore, LD matrices are often diagonally-dominant.

+

This class SampleLD provides a basic interface to compute sample correlation coefficient between + all variants defined in a genotype matrix. The resulting LD matrix is a square and dense matrix.

+

For sparse LD matrices, consider using the WindowedLD, ShrinkageLD or BlockLD estimators instead.

+

!!! seealso "See Also" + * WindowedLD + * ShrinkageLD + * BlockLD

+

:ivar genotype_matrix: The genotype matrix, an instance of GenotypeMatrix or its children.

+ +
+ Source code in magenpy/stats/ld/estimator.py +
  4
+  5
+  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
class SampleLD(object):
+    """
+    A basic wrapper class to facilitate computing Linkage-Disequilibrium (LD) matrices.
+
+    Linkage-Disequilibrium (LD) is a measure of the SNP-by-SNP pairwise correlation between
+    genetic variants in a population. LD tends to decay with genomic distance, and the rate
+    of decay is influenced by many factors. Therefore, LD matrices are often diagonally-dominant.
+
+    This class `SampleLD` provides a basic interface to compute sample correlation coefficient between
+     all variants defined in a genotype matrix. The resulting LD matrix is a square and dense matrix.
+
+     For sparse LD matrices, consider using the `WindowedLD`, `ShrinkageLD` or `BlockLD` estimators instead.
+
+     !!! seealso "See Also"
+        * [WindowedLD][magenpy.stats.ld.estimator.WindowedLD]
+        * [ShrinkageLD][magenpy.stats.ld.estimator.ShrinkageLD]
+        * [BlockLD][magenpy.stats.ld.estimator.BlockLD]
+
+     :ivar genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix` or its children.
+
+    """
+
+    def __init__(self, genotype_matrix):
+        """
+        Initialize the LD estimator with a genotype matrix.
+        :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+        """
+
+        self.genotype_matrix = genotype_matrix
+
+        # Ensure that the genotype matrix has data for a single chromosome only:
+        if self.genotype_matrix.chromosome is None:
+            raise Exception("We do not support computing inter-chromosomal LD matrices! "
+                            "You may need to split the genotype matrix by chromosome. "
+                            "See GenotypeMatrix.split_by_chromosome().")
+
+    def compute_ld_boundaries(self):
+        """
+         Compute the Linkage-Disequilibrium (LD) boundaries. LD boundaries define the window
+         for which we compute the correlation coefficient between the focal SNP and all other SNPs in
+         the genome. Typically, this window is local, since the LD decays exponentially with
+         genomic distance.
+
+         The LD boundaries are a 2xM matrix, where M is the number of SNPs on the chromosome.
+         The first row contains the start position for the window and the second row contains
+         the end position.
+
+         For the sample LD matrix, we simply take the entire square matrix as our window,
+         so the start position is 0 and end position is M for all SNPs.
+
+         :return: A 2xM matrix of LD boundaries.
+        """
+        m = self.genotype_matrix.n_snps
+        return np.array((np.zeros(m), np.ones(m)*m)).astype(np.int64)
+
+    def compute(self,
+                output_dir,
+                temp_dir='temp',
+                overwrite=True,
+                delete_original=True,
+                dtype='int16',
+                compressor_name='lz4',
+                compression_level=5):
+        """
+        A utility method to compute the LD matrix and store in Zarr array format.
+        The computes the LD matrix and stores it in Zarr array format, set its attributes,
+        and performs simple validation at the end.
+
+        :param output_dir: The path where to store the resulting LD matrix.
+        :param temp_dir: A temporary directory to store intermediate files and results.
+        :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+        :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+        :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+        :param compressor_name: The name of the compressor to use for the LD matrix.
+        :param compression_level: The compression level to use for the LD matrix (1-9).
+
+        :return: An instance of `LDMatrix` containing the computed LD matrix.
+
+        """
+
+        from .utils import compute_ld_xarray, compute_ld_plink1p9
+        from ...GenotypeMatrix import xarrayGenotypeMatrix, plinkBEDGenotypeMatrix
+
+        assert str(dtype) in ('float32', 'float64', 'int8', 'int16')
+
+        ld_boundaries = self.compute_ld_boundaries()
+
+        if isinstance(self.genotype_matrix, xarrayGenotypeMatrix):
+            ld_mat = compute_ld_xarray(self.genotype_matrix,
+                                       ld_boundaries,
+                                       output_dir,
+                                       temp_dir=temp_dir,
+                                       overwrite=overwrite,
+                                       delete_original=delete_original,
+                                       dtype=dtype,
+                                       compressor_name=compressor_name,
+                                       compression_level=compression_level)
+        elif isinstance(self.genotype_matrix, plinkBEDGenotypeMatrix):
+            ld_mat = compute_ld_plink1p9(self.genotype_matrix,
+                                         ld_boundaries,
+                                         output_dir,
+                                         temp_dir=temp_dir,
+                                         overwrite=overwrite,
+                                         dtype=dtype,
+                                         compressor_name=compressor_name,
+                                         compression_level=compression_level)
+        else:
+            raise NotImplementedError
+
+        # Add attributes to the LDMatrix object:
+        ld_mat.set_store_attr('Chromosome', int(self.genotype_matrix.chromosome))
+        ld_mat.set_store_attr('Sample size', int(self.genotype_matrix.sample_size))
+        ld_mat.set_store_attr('LD estimator', 'sample')
+
+        if self.genotype_matrix.genome_build is not None:
+            ld_mat.set_store_attr('Genome build', self.genotype_matrix.genome_build)
+
+        ld_mat.set_metadata('snps', self.genotype_matrix.snps, overwrite=overwrite)
+        ld_mat.set_metadata('bp', self.genotype_matrix.bp_pos, overwrite=overwrite)
+        ld_mat.set_metadata('maf', self.genotype_matrix.maf, overwrite=overwrite)
+        ld_mat.set_metadata('a1', self.genotype_matrix.a1, overwrite=overwrite)
+        ld_mat.set_metadata('a2', self.genotype_matrix.a2, overwrite=overwrite)
+
+        try:
+            ld_mat.set_metadata('cm', self.genotype_matrix.cm_pos, overwrite=overwrite)
+        except KeyError:
+            pass
+
+        ld_mat.set_metadata('ldscore', ld_mat.compute_ld_scores(), overwrite=overwrite)
+
+        if ld_mat.validate_ld_matrix():
+            return ld_mat
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(genotype_matrix) + +

+ + +
+ +

Initialize the LD estimator with a genotype matrix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
+ required +
+ +
+ Source code in magenpy/stats/ld/estimator.py +
26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
def __init__(self, genotype_matrix):
+    """
+    Initialize the LD estimator with a genotype matrix.
+    :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    """
+
+    self.genotype_matrix = genotype_matrix
+
+    # Ensure that the genotype matrix has data for a single chromosome only:
+    if self.genotype_matrix.chromosome is None:
+        raise Exception("We do not support computing inter-chromosomal LD matrices! "
+                        "You may need to split the genotype matrix by chromosome. "
+                        "See GenotypeMatrix.split_by_chromosome().")
+
+
+
+ +
+ + +
+ + + +

+ compute(output_dir, temp_dir='temp', overwrite=True, delete_original=True, dtype='int16', compressor_name='lz4', compression_level=5) + +

+ + +
+ +

A utility method to compute the LD matrix and store in Zarr array format. +The computes the LD matrix and stores it in Zarr array format, set its attributes, +and performs simple validation at the end.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_dir + +
+

The path where to store the resulting LD matrix.

+
+
+ required +
temp_dir + +
+

A temporary directory to store intermediate files and results.

+
+
+ 'temp' +
overwrite + +
+

If True, overwrite any existing LD matrices in temp_dir and output_dir.

+
+
+ True +
delete_original + +
+

If True, deletes dense or intermediate LD matrices generated along the way.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the LD matrix.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the LD matrix (1-9).

+
+
+ 5 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An instance of LDMatrix containing the computed LD matrix.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute(self,
+            output_dir,
+            temp_dir='temp',
+            overwrite=True,
+            delete_original=True,
+            dtype='int16',
+            compressor_name='lz4',
+            compression_level=5):
+    """
+    A utility method to compute the LD matrix and store in Zarr array format.
+    The computes the LD matrix and stores it in Zarr array format, set its attributes,
+    and performs simple validation at the end.
+
+    :param output_dir: The path where to store the resulting LD matrix.
+    :param temp_dir: A temporary directory to store intermediate files and results.
+    :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+    :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+    and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor to use for the LD matrix.
+    :param compression_level: The compression level to use for the LD matrix (1-9).
+
+    :return: An instance of `LDMatrix` containing the computed LD matrix.
+
+    """
+
+    from .utils import compute_ld_xarray, compute_ld_plink1p9
+    from ...GenotypeMatrix import xarrayGenotypeMatrix, plinkBEDGenotypeMatrix
+
+    assert str(dtype) in ('float32', 'float64', 'int8', 'int16')
+
+    ld_boundaries = self.compute_ld_boundaries()
+
+    if isinstance(self.genotype_matrix, xarrayGenotypeMatrix):
+        ld_mat = compute_ld_xarray(self.genotype_matrix,
+                                   ld_boundaries,
+                                   output_dir,
+                                   temp_dir=temp_dir,
+                                   overwrite=overwrite,
+                                   delete_original=delete_original,
+                                   dtype=dtype,
+                                   compressor_name=compressor_name,
+                                   compression_level=compression_level)
+    elif isinstance(self.genotype_matrix, plinkBEDGenotypeMatrix):
+        ld_mat = compute_ld_plink1p9(self.genotype_matrix,
+                                     ld_boundaries,
+                                     output_dir,
+                                     temp_dir=temp_dir,
+                                     overwrite=overwrite,
+                                     dtype=dtype,
+                                     compressor_name=compressor_name,
+                                     compression_level=compression_level)
+    else:
+        raise NotImplementedError
+
+    # Add attributes to the LDMatrix object:
+    ld_mat.set_store_attr('Chromosome', int(self.genotype_matrix.chromosome))
+    ld_mat.set_store_attr('Sample size', int(self.genotype_matrix.sample_size))
+    ld_mat.set_store_attr('LD estimator', 'sample')
+
+    if self.genotype_matrix.genome_build is not None:
+        ld_mat.set_store_attr('Genome build', self.genotype_matrix.genome_build)
+
+    ld_mat.set_metadata('snps', self.genotype_matrix.snps, overwrite=overwrite)
+    ld_mat.set_metadata('bp', self.genotype_matrix.bp_pos, overwrite=overwrite)
+    ld_mat.set_metadata('maf', self.genotype_matrix.maf, overwrite=overwrite)
+    ld_mat.set_metadata('a1', self.genotype_matrix.a1, overwrite=overwrite)
+    ld_mat.set_metadata('a2', self.genotype_matrix.a2, overwrite=overwrite)
+
+    try:
+        ld_mat.set_metadata('cm', self.genotype_matrix.cm_pos, overwrite=overwrite)
+    except KeyError:
+        pass
+
+    ld_mat.set_metadata('ldscore', ld_mat.compute_ld_scores(), overwrite=overwrite)
+
+    if ld_mat.validate_ld_matrix():
+        return ld_mat
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_boundaries() + +

+ + +
+ +

Compute the Linkage-Disequilibrium (LD) boundaries. LD boundaries define the window +for which we compute the correlation coefficient between the focal SNP and all other SNPs in +the genome. Typically, this window is local, since the LD decays exponentially with +genomic distance.

+

The LD boundaries are a 2xM matrix, where M is the number of SNPs on the chromosome. +The first row contains the start position for the window and the second row contains +the end position.

+

For the sample LD matrix, we simply take the entire square matrix as our window, +so the start position is 0 and end position is M for all SNPs.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A 2xM matrix of LD boundaries.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
def compute_ld_boundaries(self):
+    """
+     Compute the Linkage-Disequilibrium (LD) boundaries. LD boundaries define the window
+     for which we compute the correlation coefficient between the focal SNP and all other SNPs in
+     the genome. Typically, this window is local, since the LD decays exponentially with
+     genomic distance.
+
+     The LD boundaries are a 2xM matrix, where M is the number of SNPs on the chromosome.
+     The first row contains the start position for the window and the second row contains
+     the end position.
+
+     For the sample LD matrix, we simply take the entire square matrix as our window,
+     so the start position is 0 and end position is M for all SNPs.
+
+     :return: A 2xM matrix of LD boundaries.
+    """
+    m = self.genotype_matrix.n_snps
+    return np.array((np.zeros(m), np.ones(m)*m)).astype(np.int64)
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ ShrinkageLD + + +

+ + +
+

+ Bases: SampleLD

+ + +

A wrapper class to facilitate computing shrinkage-based Linkage-Disequilibrium (LD) matrices. +Shrinkage LD matrices are a way to reduce noise in the LD matrix by shrinking the off-diagonal pairwise +correlation coefficients towards zero. This is useful for reducing the noise in the LD matrix and +improving the quality of downstream analyses.

+

The shrinkage estimator implemented uses the shrinking procedure derived in:

+

Wen X, Stephens M. USING LINEAR PREDICTORS TO IMPUTE ALLELE FREQUENCIES FROM SUMMARY OR POOLED GENOTYPE DATA. +Ann Appl Stat. 2010 Sep;4(3):1158-1182. doi: 10.1214/10-aoas338. PMID: 21479081; PMCID: PMC3072818.

+

Computing the shrinkage intensity requires specifying the effective population size (Ne) and the sample size +used to infer the genetic map. In addition, it requires specifying a threshold below which the LD is set to zero.

+
+

See Also

+ +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
genetic_map_ne + +
+

The effective population size (Ne) from which the genetic map is derived.

+
+
genetic_map_sample_size + +
+

The sample size used to infer the genetic map.

+
+
threshold + +
+

The shrinkage cutoff below which the LD is set to zero.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
class ShrinkageLD(SampleLD):
+    """
+    A wrapper class to facilitate computing shrinkage-based Linkage-Disequilibrium (LD) matrices.
+    Shrinkage LD matrices are a way to reduce noise in the LD matrix by shrinking the off-diagonal pairwise
+    correlation coefficients towards zero. This is useful for reducing the noise in the LD matrix and
+    improving the quality of downstream analyses.
+
+    The shrinkage estimator implemented uses the shrinking procedure derived in:
+
+    Wen X, Stephens M. USING LINEAR PREDICTORS TO IMPUTE ALLELE FREQUENCIES FROM SUMMARY OR POOLED GENOTYPE DATA.
+    Ann Appl Stat. 2010 Sep;4(3):1158-1182. doi: 10.1214/10-aoas338. PMID: 21479081; PMCID: PMC3072818.
+
+    Computing the shrinkage intensity requires specifying the effective population size (Ne) and the sample size
+    used to infer the genetic map. In addition, it requires specifying a threshold below which the LD is set to zero.
+
+    !!! seealso "See Also"
+        * [WindowedLD][magenpy.stats.ld.estimator.WindowedLD]
+        * [BlockLD][magenpy.stats.ld.estimator.BlockLD]
+
+    :ivar genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :ivar genetic_map_ne: The effective population size (Ne) from which the genetic map is derived.
+    :ivar genetic_map_sample_size: The sample size used to infer the genetic map.
+    :ivar threshold: The shrinkage cutoff below which the LD is set to zero.
+
+    """
+
+    def __init__(self,
+                 genotype_matrix,
+                 genetic_map_ne,
+                 genetic_map_sample_size,
+                 threshold=1e-3):
+        """
+
+        Initialize the shrinkage LD estimator with a genotype matrix and shrinkage parameters.
+
+        :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+        :param genetic_map_ne: The effective population size (Ne) from which the genetic map is derived.
+        :param genetic_map_sample_size: The sample size used to infer the genetic map.
+        :param threshold: The shrinkage cutoff below which the LD is set to zero.
+        """
+
+        super().__init__(genotype_matrix=genotype_matrix)
+
+        self.genetic_map_ne = genetic_map_ne
+        self.genetic_map_sample_size = genetic_map_sample_size
+        self.threshold = threshold
+
+    def compute_ld_boundaries(self):
+        """
+        Compute the shrinkage-based Linkage-Disequilibrium (LD) boundaries.
+
+        :return: A 2xM matrix of LD boundaries.
+        """
+
+        from .c_utils import find_shrinkage_ld_boundaries
+        return find_shrinkage_ld_boundaries(self.genotype_matrix.cm_pos,
+                                            self.genetic_map_ne,
+                                            self.genetic_map_sample_size,
+                                            self.threshold)
+
+    def compute(self,
+                output_dir,
+                temp_dir='temp',
+                overwrite=True,
+                delete_original=True,
+                dtype='int16',
+                compressor_name='lz4',
+                compression_level=5,
+                chunk_size=1000):
+        """
+
+        TODO: Add a mechanism to either automatically adjust the shrinkage threshold depending on the
+        float precision (dtype) or purge trailing zero entries that got quantized to zero. For example,
+        if we select a shrinkage threshold of 1e-3 with (int8), then we will have a lot of
+        trailing zeros stored in the resulting LD matrix. It's better if we got rid of those zeros to
+        minimize storage requirements and computation time.
+
+        !!! note
+            LD Scores are computed before applying shrinkage.
+
+        :param output_dir: The path where to store the resulting LD matrix.
+        :param temp_dir: A temporary directory to store intermediate files and results.
+        :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+        :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+        :param dtype: The data type for the entries of the LD matrix.
+        :param compressor_name: The name of the compressor to use for the LD matrix.
+        :param compression_level: The compression level to use for the LD matrix (1-9).
+        :param chunk_size: An optional parameter that sets the maximum number of rows processed simultaneously.
+        The smaller the `chunk_size`, the less memory requirements needed for the shrinkage step.
+
+        :return: An instance of `LDMatrix` containing the computed LD matrix.
+
+        """
+
+        ld_mat = super().compute(output_dir,
+                                 temp_dir,
+                                 overwrite=overwrite,
+                                 delete_original=delete_original,
+                                 dtype=dtype,
+                                 compressor_name=compressor_name,
+                                 compression_level=compression_level)
+
+        from .utils import shrink_ld_matrix
+
+        ld_mat = shrink_ld_matrix(ld_mat,
+                                  self.genotype_matrix.cm_pos,
+                                  self.genotype_matrix.maf_var,
+                                  self.genetic_map_ne,
+                                  self.genetic_map_sample_size,
+                                  self.threshold,
+                                  chunk_size=chunk_size)
+
+        ld_mat.set_store_attr('LD estimator', 'shrinkage')
+
+        ld_mat.set_store_attr('Estimator properties', {
+                    'Genetic map Ne': self.genetic_map_ne,
+                    'Genetic map sample size': self.genetic_map_sample_size,
+                    'Threshold': self.threshold
+                })
+
+        return ld_mat
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(genotype_matrix, genetic_map_ne, genetic_map_sample_size, threshold=0.001) + +

+ + +
+ +

Initialize the shrinkage LD estimator with a genotype matrix and shrinkage parameters.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
+ required +
genetic_map_ne + +
+

The effective population size (Ne) from which the genetic map is derived.

+
+
+ required +
genetic_map_sample_size + +
+

The sample size used to infer the genetic map.

+
+
+ required +
threshold + +
+

The shrinkage cutoff below which the LD is set to zero.

+
+
+ 0.001 +
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def __init__(self,
+             genotype_matrix,
+             genetic_map_ne,
+             genetic_map_sample_size,
+             threshold=1e-3):
+    """
+
+    Initialize the shrinkage LD estimator with a genotype matrix and shrinkage parameters.
+
+    :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :param genetic_map_ne: The effective population size (Ne) from which the genetic map is derived.
+    :param genetic_map_sample_size: The sample size used to infer the genetic map.
+    :param threshold: The shrinkage cutoff below which the LD is set to zero.
+    """
+
+    super().__init__(genotype_matrix=genotype_matrix)
+
+    self.genetic_map_ne = genetic_map_ne
+    self.genetic_map_sample_size = genetic_map_sample_size
+    self.threshold = threshold
+
+
+
+ +
+ + +
+ + + +

+ compute(output_dir, temp_dir='temp', overwrite=True, delete_original=True, dtype='int16', compressor_name='lz4', compression_level=5, chunk_size=1000) + +

+ + +
+ +

TODO: Add a mechanism to either automatically adjust the shrinkage threshold depending on the +float precision (dtype) or purge trailing zero entries that got quantized to zero. For example, +if we select a shrinkage threshold of 1e-3 with (int8), then we will have a lot of +trailing zeros stored in the resulting LD matrix. It's better if we got rid of those zeros to +minimize storage requirements and computation time.

+
+

Note

+

LD Scores are computed before applying shrinkage.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_dir + +
+

The path where to store the resulting LD matrix.

+
+
+ required +
temp_dir + +
+

A temporary directory to store intermediate files and results.

+
+
+ 'temp' +
overwrite + +
+

If True, overwrite any existing LD matrices in temp_dir and output_dir.

+
+
+ True +
delete_original + +
+

If True, deletes dense or intermediate LD matrices generated along the way.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the LD matrix.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the LD matrix (1-9).

+
+
+ 5 +
chunk_size + +
+

An optional parameter that sets the maximum number of rows processed simultaneously. The smaller the chunk_size, the less memory requirements needed for the shrinkage step.

+
+
+ 1000 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An instance of LDMatrix containing the computed LD matrix.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute(self,
+            output_dir,
+            temp_dir='temp',
+            overwrite=True,
+            delete_original=True,
+            dtype='int16',
+            compressor_name='lz4',
+            compression_level=5,
+            chunk_size=1000):
+    """
+
+    TODO: Add a mechanism to either automatically adjust the shrinkage threshold depending on the
+    float precision (dtype) or purge trailing zero entries that got quantized to zero. For example,
+    if we select a shrinkage threshold of 1e-3 with (int8), then we will have a lot of
+    trailing zeros stored in the resulting LD matrix. It's better if we got rid of those zeros to
+    minimize storage requirements and computation time.
+
+    !!! note
+        LD Scores are computed before applying shrinkage.
+
+    :param output_dir: The path where to store the resulting LD matrix.
+    :param temp_dir: A temporary directory to store intermediate files and results.
+    :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+    :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+    :param dtype: The data type for the entries of the LD matrix.
+    :param compressor_name: The name of the compressor to use for the LD matrix.
+    :param compression_level: The compression level to use for the LD matrix (1-9).
+    :param chunk_size: An optional parameter that sets the maximum number of rows processed simultaneously.
+    The smaller the `chunk_size`, the less memory requirements needed for the shrinkage step.
+
+    :return: An instance of `LDMatrix` containing the computed LD matrix.
+
+    """
+
+    ld_mat = super().compute(output_dir,
+                             temp_dir,
+                             overwrite=overwrite,
+                             delete_original=delete_original,
+                             dtype=dtype,
+                             compressor_name=compressor_name,
+                             compression_level=compression_level)
+
+    from .utils import shrink_ld_matrix
+
+    ld_mat = shrink_ld_matrix(ld_mat,
+                              self.genotype_matrix.cm_pos,
+                              self.genotype_matrix.maf_var,
+                              self.genetic_map_ne,
+                              self.genetic_map_sample_size,
+                              self.threshold,
+                              chunk_size=chunk_size)
+
+    ld_mat.set_store_attr('LD estimator', 'shrinkage')
+
+    ld_mat.set_store_attr('Estimator properties', {
+                'Genetic map Ne': self.genetic_map_ne,
+                'Genetic map sample size': self.genetic_map_sample_size,
+                'Threshold': self.threshold
+            })
+
+    return ld_mat
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_boundaries() + +

+ + +
+ +

Compute the shrinkage-based Linkage-Disequilibrium (LD) boundaries.

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A 2xM matrix of LD boundaries.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute_ld_boundaries(self):
+    """
+    Compute the shrinkage-based Linkage-Disequilibrium (LD) boundaries.
+
+    :return: A 2xM matrix of LD boundaries.
+    """
+
+    from .c_utils import find_shrinkage_ld_boundaries
+    return find_shrinkage_ld_boundaries(self.genotype_matrix.cm_pos,
+                                        self.genetic_map_ne,
+                                        self.genetic_map_sample_size,
+                                        self.threshold)
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ WindowedLD + + +

+ + +
+

+ Bases: SampleLD

+ + +

A wrapper class to facilitate computing windowed Linkage-Disequilibrium (LD) matrices. +Windowed LD matrices only record pairwise correlations between variants that are within a certain +distance of each other along the chromosome. This is useful for reducing the memory requirements +and noise in the LD matrix.

+

The WindowedLD estimator supports a variety of ways for defining the window size:

+
    +
  • window_size: The number of neighboring SNPs to consider on each side when computing LD.
  • +
  • kb_window_size: The maximum distance in kilobases to consider when computing LD.
  • +
  • cm_window_size: The maximum distance in centi Morgan to consider when computing LD.
  • +
+

The LD boundaries computed here are the intersection of the windows defined by the window size around +each SNP (window_size), the window size in kilobases (kb_window_size), and the window size in centi Morgan +(cm_window_size).

+
+

See Also

+ +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
window_size + +
+

The number of neighboring SNPs to consider on each side when computing LD.

+
+
kb_window_size + +
+

The maximum distance in kilobases to consider when computing LD.

+
+
cm_window_size + +
+

The maximum distance in centi Morgan to consider when computing LD.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
class WindowedLD(SampleLD):
+    """
+    A wrapper class to facilitate computing windowed Linkage-Disequilibrium (LD) matrices.
+    Windowed LD matrices only record pairwise correlations between variants that are within a certain
+    distance of each other along the chromosome. This is useful for reducing the memory requirements
+    and noise in the LD matrix.
+
+    The `WindowedLD` estimator supports a variety of ways for defining the window size:
+
+    * `window_size`: The number of neighboring SNPs to consider on each side when computing LD.
+    * `kb_window_size`: The maximum distance in kilobases to consider when computing LD.
+    * `cm_window_size`: The maximum distance in centi Morgan to consider when computing LD.
+
+    The LD boundaries computed here are the intersection of the windows defined by the window size around
+    each SNP (`window_size`), the window size in kilobases (`kb_window_size`), and the window size in centi Morgan
+    (`cm_window_size`).
+
+    !!! seealso "See Also"
+        * [WindowedLD][magenpy.stats.ld.estimator.ShrinkageLD]
+        * [BlockLD][magenpy.stats.ld.estimator.BlockLD]
+
+    :ivar genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :ivar window_size: The number of neighboring SNPs to consider on each side when computing LD.
+    :ivar kb_window_size: The maximum distance in kilobases to consider when computing LD.
+    :ivar cm_window_size: The maximum distance in centi Morgan to consider when computing LD.
+
+    """
+
+    def __init__(self,
+                 genotype_matrix,
+                 window_size=None,
+                 kb_window_size=None,
+                 cm_window_size=None):
+        """
+
+        Initialize the windowed LD estimator with a genotype matrix and window size parameters.
+
+        :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+        :param window_size: The number of neighboring SNPs to consider on each side when computing LD.
+        :param kb_window_size: The maximum distance in kilobases to consider when computing LD.
+        :param cm_window_size: The maximum distance in centi Morgan to consider when computing LD.
+        """
+
+        super().__init__(genotype_matrix=genotype_matrix)
+
+        assert not all([w is None for w in (window_size, kb_window_size, cm_window_size)])
+
+        self.window_size = window_size
+        self.kb_window_size = kb_window_size
+        self.cm_window_size = cm_window_size
+
+    def compute_ld_boundaries(self):
+        """
+         Compute the windowed Linkage-Disequilibrium (LD) boundaries.
+         The LD boundaries computed here are the intersection of the windows defined by
+         the window size around each SNP (`window_size`), the window size in kilobases (`kb_window_size`),
+         and the window size in centi Morgan (`cm_window_size`).
+
+         :return: A 2xM matrix of LD boundaries.
+        """
+
+        bounds = []
+
+        m = self.genotype_matrix.n_snps
+        indices = np.arange(m)
+
+        if self.window_size is not None:
+            bounds.append(
+                np.clip(np.array(
+                    [indices - self.window_size,
+                     indices + self.window_size
+                     ]
+                ),  a_min=0, a_max=m)
+            )
+
+        from .c_utils import find_windowed_ld_boundaries
+
+        if self.kb_window_size is not None:
+            bounds.append(
+                find_windowed_ld_boundaries(.001*self.genotype_matrix.bp_pos,
+                                            self.kb_window_size)
+            )
+
+        if self.cm_window_size is not None:
+            bounds.append(
+                find_windowed_ld_boundaries(self.genotype_matrix.cm_pos,
+                                            self.cm_window_size)
+            )
+
+        if len(bounds) == 1:
+            return bounds[0]
+        else:
+            return np.array([
+                np.maximum.reduce([b[0, :] for b in bounds]),
+                np.minimum.reduce([b[1, :] for b in bounds])
+            ])
+
+    def compute(self,
+                output_dir,
+                temp_dir='temp',
+                overwrite=True,
+                delete_original=True,
+                dtype='int16',
+                compressor_name='lz4',
+                compression_level=5):
+        """
+
+        Compute the windowed LD matrix and store in Zarr array format.
+
+        :param output_dir: The path where to store the resulting LD matrix.
+        :param temp_dir: A temporary directory to store intermediate files and results.
+        :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+        :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+        :param dtype: The data type for the entries of the LD matrix.
+        :param compressor_name: The name of the compressor to use for the LD matrix.
+        :param compression_level: The compression level to use for the LD matrix (1-9).
+
+        :return: An instance of `LDMatrix` containing the computed LD matrix.
+        """
+
+        ld_mat = super().compute(output_dir,
+                                 temp_dir,
+                                 overwrite=overwrite,
+                                 delete_original=delete_original,
+                                 dtype=dtype,
+                                 compressor_name=compressor_name,
+                                 compression_level=compression_level)
+
+        ld_mat.set_store_attr('LD estimator', 'windowed')
+
+        w_properties = {}
+        if self.window_size is not None:
+            w_properties['Window size'] = self.window_size
+
+        if self.kb_window_size is not None:
+            w_properties['Window size (kb)'] = self.kb_window_size
+
+        if self.cm_window_size is not None:
+            w_properties['Window size (cM)'] = self.cm_window_size
+
+        ld_mat.set_store_attr('Estimator properties', w_properties)
+
+        return ld_mat
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(genotype_matrix, window_size=None, kb_window_size=None, cm_window_size=None) + +

+ + +
+ +

Initialize the windowed LD estimator with a genotype matrix and window size parameters.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

The genotype matrix, an instance of GenotypeMatrix.

+
+
+ required +
window_size + +
+

The number of neighboring SNPs to consider on each side when computing LD.

+
+
+ None +
kb_window_size + +
+

The maximum distance in kilobases to consider when computing LD.

+
+
+ None +
cm_window_size + +
+

The maximum distance in centi Morgan to consider when computing LD.

+
+
+ None +
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def __init__(self,
+             genotype_matrix,
+             window_size=None,
+             kb_window_size=None,
+             cm_window_size=None):
+    """
+
+    Initialize the windowed LD estimator with a genotype matrix and window size parameters.
+
+    :param genotype_matrix: The genotype matrix, an instance of `GenotypeMatrix`.
+    :param window_size: The number of neighboring SNPs to consider on each side when computing LD.
+    :param kb_window_size: The maximum distance in kilobases to consider when computing LD.
+    :param cm_window_size: The maximum distance in centi Morgan to consider when computing LD.
+    """
+
+    super().__init__(genotype_matrix=genotype_matrix)
+
+    assert not all([w is None for w in (window_size, kb_window_size, cm_window_size)])
+
+    self.window_size = window_size
+    self.kb_window_size = kb_window_size
+    self.cm_window_size = cm_window_size
+
+
+
+ +
+ + +
+ + + +

+ compute(output_dir, temp_dir='temp', overwrite=True, delete_original=True, dtype='int16', compressor_name='lz4', compression_level=5) + +

+ + +
+ +

Compute the windowed LD matrix and store in Zarr array format.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
output_dir + +
+

The path where to store the resulting LD matrix.

+
+
+ required +
temp_dir + +
+

A temporary directory to store intermediate files and results.

+
+
+ 'temp' +
overwrite + +
+

If True, overwrite any existing LD matrices in temp_dir and output_dir.

+
+
+ True +
delete_original + +
+

If True, deletes dense or intermediate LD matrices generated along the way.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix.

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the LD matrix.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the LD matrix (1-9).

+
+
+ 5 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

An instance of LDMatrix containing the computed LD matrix.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute(self,
+            output_dir,
+            temp_dir='temp',
+            overwrite=True,
+            delete_original=True,
+            dtype='int16',
+            compressor_name='lz4',
+            compression_level=5):
+    """
+
+    Compute the windowed LD matrix and store in Zarr array format.
+
+    :param output_dir: The path where to store the resulting LD matrix.
+    :param temp_dir: A temporary directory to store intermediate files and results.
+    :param overwrite: If True, overwrite any existing LD matrices in `temp_dir` and `output_dir`.
+    :param delete_original: If True, deletes dense or intermediate LD matrices generated along the way.
+    :param dtype: The data type for the entries of the LD matrix.
+    :param compressor_name: The name of the compressor to use for the LD matrix.
+    :param compression_level: The compression level to use for the LD matrix (1-9).
+
+    :return: An instance of `LDMatrix` containing the computed LD matrix.
+    """
+
+    ld_mat = super().compute(output_dir,
+                             temp_dir,
+                             overwrite=overwrite,
+                             delete_original=delete_original,
+                             dtype=dtype,
+                             compressor_name=compressor_name,
+                             compression_level=compression_level)
+
+    ld_mat.set_store_attr('LD estimator', 'windowed')
+
+    w_properties = {}
+    if self.window_size is not None:
+        w_properties['Window size'] = self.window_size
+
+    if self.kb_window_size is not None:
+        w_properties['Window size (kb)'] = self.kb_window_size
+
+    if self.cm_window_size is not None:
+        w_properties['Window size (cM)'] = self.cm_window_size
+
+    ld_mat.set_store_attr('Estimator properties', w_properties)
+
+    return ld_mat
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_boundaries() + +

+ + +
+ +

Compute the windowed Linkage-Disequilibrium (LD) boundaries. +The LD boundaries computed here are the intersection of the windows defined by +the window size around each SNP (window_size), the window size in kilobases (kb_window_size), +and the window size in centi Morgan (cm_window_size).

+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A 2xM matrix of LD boundaries.

+
+
+ +
+ Source code in magenpy/stats/ld/estimator.py +
def compute_ld_boundaries(self):
+    """
+     Compute the windowed Linkage-Disequilibrium (LD) boundaries.
+     The LD boundaries computed here are the intersection of the windows defined by
+     the window size around each SNP (`window_size`), the window size in kilobases (`kb_window_size`),
+     and the window size in centi Morgan (`cm_window_size`).
+
+     :return: A 2xM matrix of LD boundaries.
+    """
+
+    bounds = []
+
+    m = self.genotype_matrix.n_snps
+    indices = np.arange(m)
+
+    if self.window_size is not None:
+        bounds.append(
+            np.clip(np.array(
+                [indices - self.window_size,
+                 indices + self.window_size
+                 ]
+            ),  a_min=0, a_max=m)
+        )
+
+    from .c_utils import find_windowed_ld_boundaries
+
+    if self.kb_window_size is not None:
+        bounds.append(
+            find_windowed_ld_boundaries(.001*self.genotype_matrix.bp_pos,
+                                        self.kb_window_size)
+        )
+
+    if self.cm_window_size is not None:
+        bounds.append(
+            find_windowed_ld_boundaries(self.genotype_matrix.cm_pos,
+                                        self.cm_window_size)
+        )
+
+    if len(bounds) == 1:
+        return bounds[0]
+    else:
+        return np.array([
+            np.maximum.reduce([b[0, :] for b in bounds]),
+            np.minimum.reduce([b[1, :] for b in bounds])
+        ])
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/ld/utils/index.html b/api/stats/ld/utils/index.html new file mode 100644 index 0000000..301b4ed --- /dev/null +++ b/api/stats/ld/utils/index.html @@ -0,0 +1,2518 @@ + + + + + + + + + + + + + + + + + + + Utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ clump_snps(ldm, statistic=None, rsq_threshold=0.9, extract=True, sort_key=None) + +

+ + +
+ +

This function takes an LDMatrix object and clumps SNPs based +on the stat vector (usually p-value) and the provided r-squared threshold. +If two SNPs have an r-squared greater than the threshold, +the SNP with the higher stat value is excluded.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ldm + +
+

An LDMatrix object

+
+
+ required +
statistic + +
+

A vector of statistics (e.g. p-values) for each SNP that will determine which SNPs to discard.

+
+
+ None +
rsq_threshold + +
+

The r^2 threshold to use for filtering variants.

+
+
+ 0.9 +
extract + +
+

If True, return remaining SNPs. If False, return removed SNPs.

+
+
+ True +
sort_key + +
+

The key function for the sorting algorithm that will decide how to sort the statistic. By default, we select the SNP with the minimum value for the statistic (e.g. smaller p-value).

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A list of SNP rsIDs that are left after clumping (or discarded if extract=False).

+
+
+ +
+ Source code in magenpy/stats/ld/utils.py +
41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
def clump_snps(ldm,
+               statistic=None,
+               rsq_threshold=.9,
+               extract=True,
+               sort_key=None):
+    """
+    This function takes an LDMatrix object and clumps SNPs based
+    on the `stat` vector (usually p-value) and the provided r-squared threshold.
+    If two SNPs have an r-squared greater than the threshold,
+    the SNP with the higher `stat` value is excluded.
+
+    :param ldm: An LDMatrix object
+    :param statistic: A vector of statistics (e.g. p-values) for each SNP that will determine which SNPs to discard.
+    :param rsq_threshold: The r^2 threshold to use for filtering variants.
+    :param extract: If True, return remaining SNPs. If False, return removed SNPs.
+    :param sort_key: The key function for the sorting algorithm that will decide how to sort the `statistic`.
+    By default, we select the SNP with the minimum value for the `statistic` (e.g. smaller p-value).
+
+    :return: A list of SNP rsIDs that are left after clumping (or discarded if `extract=False`).
+    """
+
+    snps = ldm.snps
+
+    if statistic is None:
+        # if a statistic is not provided, then clump SNPs based on their base pair order,
+        # meaning that if two SNPs are highly correlated, we keep the one with smaller base pair position.
+        statistic = ldm.bp_position
+    else:
+        assert len(statistic) == len(snps)
+
+    if sort_key is not None:
+        sort_key = lambda x: sort_key(statistic[x])
+
+    sorted_idx = sorted(range(len(ldm)), key=sort_key)
+
+    snps = ldm.snps
+    keep_snps_dict = dict(zip(snps, np.ones(len(snps), dtype=bool)))
+
+    for idx in sorted_idx:
+
+        if not keep_snps_dict[snps[idx]]:
+            continue
+
+        r, indices = ldm.get_row(idx, return_indices=True)
+        # Find the SNPs that we need to remove:
+        # We remove SNPs whose squared correlation coefficient with the index SNP is
+        # greater than the specified rsq_threshold:
+        snps_to_remove = snps[indices[np.where(r**2 > rsq_threshold)[0]]]
+
+        # Update the `keep_snps_dict` dictionary:
+        keep_snps_dict.update(dict(zip(snps_to_remove, np.zeros(len(snps_to_remove), dtype=bool))))
+
+    if extract:
+        return [snp for snp, cond in keep_snps_dict.items() if cond]
+    else:
+        return [snp for snp, cond in keep_snps_dict.items() if not cond]
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_plink1p9(genotype_matrix, ld_boundaries, output_dir, temp_dir='temp', overwrite=True, dtype='int16', compressor_name='lz4', compression_level=5) + +

+ + +
+ +

Compute LD matrices using plink 1.9.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A plinkBEDGenotypeMatrix object

+
+
+ required +
ld_boundaries + +
+

An array of LD boundaries for every SNP

+
+
+ required +
output_dir + +
+

The output directory for the final LD matrix file (after processing).

+
+
+ required +
temp_dir + +
+

A temporary directory to store intermediate files (e.g. files created for and by plink).

+
+
+ 'temp' +
overwrite + +
+

If True, it overwrites any LD matrices in output_dir.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the Zarr arrays.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the Zarr arrays (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/stats/ld/utils.py +
def compute_ld_plink1p9(genotype_matrix,
+                        ld_boundaries,
+                        output_dir,
+                        temp_dir='temp',
+                        overwrite=True,
+                        dtype='int16',
+                        compressor_name='lz4',
+                        compression_level=5):
+
+    """
+    Compute LD matrices using plink 1.9.
+
+    :param genotype_matrix: A plinkBEDGenotypeMatrix object
+    :param ld_boundaries: An array of LD boundaries for every SNP
+    :param output_dir: The output directory for the final LD matrix file (after processing).
+    :param temp_dir: A temporary directory to store intermediate files (e.g. files created for and by plink).
+    :param overwrite: If True, it overwrites any LD matrices in `output_dir`.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor to use for the Zarr arrays.
+    :param compression_level: The compression level to use for the Zarr arrays (1-9).
+    """
+
+    from ...utils.executors import plink1Executor
+    from ...GenotypeMatrix import plinkBEDGenotypeMatrix
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink1 = plink1Executor()
+
+    keep_file = osp.join(temp_dir, 'samples.keep')
+    keep_table = genotype_matrix.sample_table.get_individual_table()
+    keep_table.to_csv(keep_file, index=False, header=False, sep="\t")
+
+    snp_keepfile = osp.join(temp_dir, 'variants.keep')
+    pd.DataFrame({'SNP': genotype_matrix.snps}).to_csv(
+        snp_keepfile, index=False, header=False
+    )
+
+    plink_output = osp.join(temp_dir, f'chr_{str(genotype_matrix.chromosome)}')
+
+    # Set the window sizes in various units:
+
+    # (1) Number of neighboring SNPs:
+    window_size = (ld_boundaries - np.arange(genotype_matrix.m)).max() + 10
+
+    # (2) Kilobases:
+    positional_bounds = np.clip(np.array([ld_boundaries[0, :] - 1, ld_boundaries[1, :]]),
+                                a_min=0, a_max=ld_boundaries.shape[1] - 1)
+
+    kb_pos = .001*genotype_matrix.bp_pos
+    kb_bounds = kb_pos[positional_bounds]
+    kb_window_size = (kb_bounds - kb_pos).max() + .01
+
+    # (3) centi Morgan:
+    try:
+        cm_pos = genotype_matrix.cm_pos
+        cm_bounds = genotype_matrix.cm_pos[positional_bounds]
+        cm_window_size = (cm_bounds - cm_pos).max() + .01
+    except Exception:
+        cm_window_size = None
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file.replace('.bed', '')}",
+        f"--keep {keep_file}",
+        f"--extract {snp_keepfile}",
+        "--keep-allele-order",
+        f"--out {plink_output}",
+        "--r gz",
+        f"--ld-window {window_size}",
+        f"--ld-window-kb {kb_window_size}"
+    ]
+
+    if cm_window_size is not None:
+        cmd.append(f"--ld-window-cm {cm_window_size}")
+
+    # ---------------------------------------------------------
+    # Test if plink1.9 version is compatible with setting the --ld-window-r2 flag:
+    # This is important to account for due to differences in the behavior of plink1.9
+    # across different versions.
+    # See here for discussion of this behavior: https://github.com/shz9/viprs/issues/3
+
+    plink1.verbose = False
+
+    r2_flag_compatible = True
+
+    from subprocess import CalledProcessError
+
+    try:
+        plink1.execute(["--r gz", "--ld-window-r2 0"])
+    except CalledProcessError as e:
+        if "--ld-window-r2 flag cannot be used with --r" in e.stderr.decode():
+            r2_flag_compatible = False
+
+    if r2_flag_compatible:
+        cmd += ["--ld-window-r2 0"]
+
+    plink1.verbose = True
+
+    # ---------------------------------------------------------
+
+    plink1.execute(cmd)
+
+    # Convert from PLINK LD files to Zarr:
+    fin_ld_store = osp.join(output_dir, 'ld', 'chr_' + str(genotype_matrix.chromosome))
+
+    # Compute the pandas chunk_size
+    # The goal of this is to process chunks of the LD table without overwhelming memory resources:
+    avg_ncols = int((ld_boundaries[1, :] - ld_boundaries[0, :]).mean())
+    rows_per_chunk = estimate_rows_per_chunk(ld_boundaries.shape[1], avg_ncols, dtype=dtype)
+
+    if rows_per_chunk > 0.1*ld_boundaries.shape[1]:
+        pandas_chunksize = None
+    else:
+        pandas_chunksize = rows_per_chunk*avg_ncols // 2
+
+    return LDMatrix.from_plink_table(f"{plink_output}.ld.gz",
+                                     genotype_matrix.snps,
+                                     fin_ld_store,
+                                     pandas_chunksize=pandas_chunksize,
+                                     overwrite=overwrite,
+                                     dtype=dtype,
+                                     compressor_name=compressor_name,
+                                     compression_level=compression_level)
+
+
+
+ +
+ + +
+ + + +

+ compute_ld_xarray(genotype_matrix, ld_boundaries, output_dir, temp_dir='temp', overwrite=True, delete_original=True, dtype='int16', compressor_name='lz4', compression_level=5) + +

+ + +
+ +

Compute the Linkage Disequilibrium matrix or snp-by-snp +correlation matrix assuming that the genotypes are represented +by xarray or dask-like matrix objects. This function computes the +entire X'X/N and stores the result on-disk in Zarr arrays. Then, we call the utilities +from the LDMatrix class to sparsify the dense matrix according to the parameters +specified by the ld_boundaries matrix.

+

NOTE: We don't recommend using this for large-scale genotype matrices. +Use compute_ld_plink1p9 instead if you have plink installed on your system.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

An xarrayGenotypeMatrix object

+
+
+ required +
ld_boundaries + +
+

An array of LD boundaries for every SNP

+
+
+ required +
output_dir + +
+

The output directory for the final LD matrix file.

+
+
+ required +
temp_dir + +
+

A temporary directory where to store intermediate results.

+
+
+ 'temp' +
overwrite + +
+

If True, overwrites LD matrices in temp_dir and output_dir, if they exist.

+
+
+ True +
delete_original + +
+

If True, it deletes the original dense matrix after generating the sparse alternative.

+
+
+ True +
dtype + +
+

The data type for the entries of the LD matrix (supported data types are float32, float64 and integer quantized data types int8 and int16).

+
+
+ 'int16' +
compressor_name + +
+

The name of the compressor to use for the Zarr arrays.

+
+
+ 'lz4' +
compression_level + +
+

The compression level to use for the Zarr arrays (1-9).

+
+
+ 5 +
+ +
+ Source code in magenpy/stats/ld/utils.py +
def compute_ld_xarray(genotype_matrix,
+                      ld_boundaries,
+                      output_dir,
+                      temp_dir='temp',
+                      overwrite=True,
+                      delete_original=True,
+                      dtype='int16',
+                      compressor_name='lz4',
+                      compression_level=5):
+
+    """
+    Compute the Linkage Disequilibrium matrix or snp-by-snp
+    correlation matrix assuming that the genotypes are represented
+    by `xarray` or `dask`-like matrix objects. This function computes the
+    entire X'X/N and stores the result on-disk in Zarr arrays. Then, we call the utilities
+    from the `LDMatrix` class to sparsify the dense matrix according to the parameters
+    specified by the `ld_boundaries` matrix.
+
+    NOTE: We don't recommend using this for large-scale genotype matrices.
+    Use `compute_ld_plink1p9` instead if you have plink installed on your system.
+
+    :param genotype_matrix: An `xarrayGenotypeMatrix` object
+    :param ld_boundaries: An array of LD boundaries for every SNP
+    :param output_dir: The output directory for the final LD matrix file.
+    :param temp_dir: A temporary directory where to store intermediate results.
+    :param overwrite: If True, overwrites LD matrices in `temp_dir` and `output_dir`, if they exist.
+    :param delete_original: If True, it deletes the original dense matrix after generating the sparse alternative.
+    :param dtype: The data type for the entries of the LD matrix (supported data types are float32, float64
+        and integer quantized data types int8 and int16).
+    :param compressor_name: The name of the compressor to use for the Zarr arrays.
+    :param compression_level: The compression level to use for the Zarr arrays (1-9).
+    """
+
+    from ...GenotypeMatrix import xarrayGenotypeMatrix
+
+    assert isinstance(genotype_matrix, xarrayGenotypeMatrix)
+
+    g_data = genotype_matrix.xr_mat
+
+    # Re-chunk the array to optimize computational speed and efficiency:
+    # New chunksizes:
+    new_chunksizes = (min(1024, g_data.shape[0]), min(1024, g_data.shape[1]))
+    g_data = g_data.chunk(dict(zip(g_data.dims, new_chunksizes)))
+
+    from ..transforms.genotype import standardize
+    import dask.array as da
+
+    # Standardize the genotype matrix and fill missing data with zeros:
+    g_mat = standardize(g_data).data
+
+    # Compute the full LD matrix and store to a temporary directory in the form of Zarr arrays:
+    import warnings
+
+    # Ignore performance-related warnings from Dask:
+    with warnings.catch_warnings():
+
+        if np.issubdtype(np.dtype(dtype), np.integer):
+            # If the requested data type is integer, we need to convert
+            # the data to `float32` to avoid overflow errors when computing the dot product:
+            dot_dtype = np.float32
+        else:
+            dot_dtype = dtype
+
+        warnings.simplefilter("ignore")
+        ld_mat = (da.dot(g_mat.T, g_mat) / genotype_matrix.sample_size).astype(dot_dtype)
+        ld_mat.to_zarr(temp_dir, overwrite=overwrite)
+
+    fin_ld_store = osp.join(output_dir, 'ld', 'chr_' + str(genotype_matrix.chromosome))
+
+    # Load the dense matrix and transform it to a sparse matrix using utilities implemented in the
+    # `LDMatrix` class:
+    return LDMatrix.from_dense_zarr_matrix(temp_dir,
+                                           ld_boundaries,
+                                           fin_ld_store,
+                                           overwrite=overwrite,
+                                           delete_original=delete_original,
+                                           dtype=dtype,
+                                           compressor_name=compressor_name,
+                                           compression_level=compression_level)
+
+
+
+ +
+ + +
+ + + +

+ delete_ld_store(ld_mat) + +

+ + +
+ +

Delete the LD store from disk.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ld_mat + +
+

An LDMatrix object

+
+
+ required +
+ +
+ Source code in magenpy/stats/ld/utils.py +
29
+30
+31
+32
+33
+34
+35
+36
+37
+38
def delete_ld_store(ld_mat):
+    """
+    Delete the LD store from disk.
+    :param ld_mat: An LDMatrix object
+    """
+
+    try:
+        ld_mat.store.rmdir()
+    except Exception as e:
+        print(e)
+
+
+
+ +
+ + +
+ + + +

+ estimate_rows_per_chunk(rows, cols, dtype='int16', mem_size=128) + +

+ + +
+ +

Estimate the number of rows per chunk for matrices conditional on the desired size of the chunk in MB. +The estimator takes as input the number of rows, columns, data type, and projected size of the chunk in memory.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
rows + +
+

Total number of rows in the matrix.

+
+
+ required +
cols + +
+

Total number of columns. If sparse matrix with uneven columns, provide average column size.

+
+
+ required +
dtype + +
+

The data type for the matrix entries.

+
+
+ 'int16' +
mem_size + +
+

Size of the chunk in memory (MB)

+
+
+ 128 +
+ +
+ Source code in magenpy/stats/ld/utils.py +
def estimate_rows_per_chunk(rows, cols, dtype='int16', mem_size=128):
+    """
+    Estimate the number of rows per chunk for matrices conditional on the desired size of the chunk in MB.
+    The estimator takes as input the number of rows, columns, data type, and projected size of the chunk in memory.
+
+    :param rows: Total number of rows in the matrix.
+    :param cols: Total number of columns. If sparse matrix with uneven columns, provide average column size.
+    :param dtype: The data type for the matrix entries.
+    :param mem_size: Size of the chunk in memory (MB)
+    """
+
+    matrix_size = rows * cols * np.dtype(dtype).itemsize / 1024 ** 2
+    n_chunks = max(1, matrix_size // mem_size)
+
+    return rows // n_chunks
+
+
+
+ +
+ + +
+ + + +

+ expand_snps(seed_snps, ldm, rsq_threshold=0.9) + +

+ + +
+ +

Given an initial set of SNPs, expand the set by adding +"neighbors" whose squared correlation with the is higher than +a user-specified threshold.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
seed_snps + +
+

An iterable containing initial set of SNP rsIDs.

+
+
+ required +
ldm + +
+

An LDMatrix object containing SNP-by-SNP correlations.

+
+
+ required +
rsq_threshold + +
+

The r^2 threshold to use for including variants.

+
+
+ 0.9 +
+ +
+ Source code in magenpy/stats/ld/utils.py +
def expand_snps(seed_snps, ldm, rsq_threshold=0.9):
+    """
+    Given an initial set of SNPs, expand the set by adding
+    "neighbors" whose squared correlation with the is higher than
+    a user-specified threshold.
+
+    :param seed_snps: An iterable containing initial set of SNP rsIDs.
+    :param ldm: An `LDMatrix` object containing SNP-by-SNP correlations.
+    :param rsq_threshold: The r^2 threshold to use for including variants.
+
+    """
+
+    ldm_snps = ldm.snps
+    snp_seed_idx = np.where(np.isin(seed_snps, ldm_snps))
+
+    if len(snp_seed_idx) < 1:
+        print("Warning: None of the seed SNPs are present in the LD matrix object!")
+        return seed_snps
+
+    final_set = set(seed_snps)
+
+    for idx in snp_seed_idx:
+        r, indices = ldm.get_row(idx, return_indices=True)
+        final_set = final_set.union(set(ldm_snps[indices[np.where(r**2 > rsq_threshold)[0]]]))
+
+    return list(final_set)
+
+
+
+ +
+ + +
+ + + +

+ move_ld_store(z_arr, target_path, overwrite=True) + +

+ + +
+ +

Move an LD store from its current path to the target_path

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
z_arr + +
+

An LDMatrix object

+
+
+ required +
target_path + +
+

The target path where to move the LD store

+
+
+ required +
overwrite + +
+

If True, overwrites the target path if it exists.

+
+
+ True +
+ +
+ Source code in magenpy/stats/ld/utils.py +
11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
def move_ld_store(z_arr, target_path, overwrite=True):
+    """
+    Move an LD store from its current path to the `target_path`
+    :param z_arr: An LDMatrix object
+    :param target_path: The target path where to move the LD store
+    :param overwrite: If True, overwrites the target path if it exists.
+    """
+
+    source_path = z_arr.store.dir_path()
+
+    if overwrite or not any(os.scandir(target_path)):
+        import shutil
+        shutil.rmtree(target_path, ignore_errors=True)
+        shutil.move(source_path, target_path)
+
+    return zarr.open(target_path)
+
+
+
+ +
+ + +
+ + + +

+ shrink_ld_matrix(ld_mat_obj, cm_pos, maf_var, genmap_ne, genmap_sample_size, shrinkage_cutoff=0.001, phased_haplotype=False, chunk_size=1000) + +

+ + +
+ +

Shrink the entries of the LD matrix using the shrinkage estimator +described in Lloyd-Jones (2019) and Wen and Stephens (2010). The estimator +is also implemented in the RSS software by Xiang Zhu:

+

https://github.com/stephenslab/rss/blob/master/misc/get_corr.R

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ld_mat_obj + +
+

An LDMatrix object encapsulating the LD matrix whose entries we wish to shrink.

+
+
+ required +
cm_pos + +
+

The position of each variant in the LD matrix in centi Morgan.

+
+
+ required +
maf_var + +
+

A vector of the variance in minor allele frequency (MAF) for each SNP in the LD matrix. Should be equivalent to 2pj(1. - pj), where pj is the MAF of SNP j.

+
+
+ required +
genmap_ne + +
+

The effective population size for the genetic map.

+
+
+ required +
genmap_sample_size + +
+

The sample size used to estimate the genetic map.

+
+
+ required +
shrinkage_cutoff + +
+

The cutoff value below which we assume that the shrinkage factor is zero.

+
+
+ 0.001 +
phased_haplotype + +
+

A flag indicating whether the LD was calculated from phased haplotypes.

+
+
+ False +
chunk_size + +
+

An optional parameter that sets the maximum number of rows processed simultaneously. The smaller the chunk_size, the less memory requirements needed for this step.

+
+
+ 1000 +
+ +
+ Source code in magenpy/stats/ld/utils.py +
def shrink_ld_matrix(ld_mat_obj,
+                     cm_pos,
+                     maf_var,
+                     genmap_ne,
+                     genmap_sample_size,
+                     shrinkage_cutoff=1e-3,
+                     phased_haplotype=False,
+                     chunk_size=1000):
+
+    """
+    Shrink the entries of the LD matrix using the shrinkage estimator
+    described in Lloyd-Jones (2019) and Wen and Stephens (2010). The estimator
+    is also implemented in the RSS software by Xiang Zhu:
+
+    https://github.com/stephenslab/rss/blob/master/misc/get_corr.R
+
+    :param ld_mat_obj: An `LDMatrix` object encapsulating the LD matrix whose entries we wish to shrink.
+    :param cm_pos: The position of each variant in the LD matrix in centi Morgan.
+    :param maf_var: A vector of the variance in minor allele frequency (MAF) for each SNP in the LD matrix. Should be
+    equivalent to 2*pj*(1. - pj), where pj is the MAF of SNP j.
+    :param genmap_ne: The effective population size for the genetic map.
+    :param genmap_sample_size: The sample size used to estimate the genetic map.
+    :param shrinkage_cutoff: The cutoff value below which we assume that the shrinkage factor is zero.
+    :param phased_haplotype: A flag indicating whether the LD was calculated from phased haplotypes.
+    :param chunk_size: An optional parameter that sets the maximum number of rows processed simultaneously. The smaller
+    the `chunk_size`, the less memory requirements needed for this step.
+    """
+
+    # The multiplicative term for the shrinkage factor
+    # The shrinkage factor is 4 * Ne * (rho_ij/100) / (2*m)
+    # where Ne is the effective population size and m is the sample size
+    # for the genetic map and rho_ij is the distance between SNPs i and j
+    # in centi Morgan.
+    # Therefore, the multiplicative term that we need to apply
+    # to the distance between SNPs is: 4*Ne/(200*m), which is equivalent to 0.02*Ne/m
+    # See also: https://github.com/stephenslab/rss/blob/master/misc/get_corr.R
+    # and Wen and Stephens (2010)
+
+    mult_term = .02*genmap_ne / genmap_sample_size
+
+    def harmonic_series_sum(n):
+        """
+        A utility function to compute the sum of the harmonic series
+        found in Equation 2.8 in Wen and Stephens (2010)
+        Acknowledgement: https://stackoverflow.com/a/27683292
+        """
+        from scipy.special import digamma
+        return digamma(n + 1) + np.euler_gamma
+
+    # Compute theta according to Eq. 2.8 in Wen and Stephens (2010)
+
+    h_sum = harmonic_series_sum(2*genmap_sample_size - 1)  # The sum of the harmonic series in Eq. 2.8
+    theta = (1. / h_sum) / (2. * genmap_sample_size + 1. / h_sum)  # The theta parameter (related to mutation rate)
+    theta_factor = (1. - theta)**2  # The theta factor that we'll multiply all elements of the covariance matrix with
+    theta_diag_factor = .5 * theta * (1. - .5 * theta)  # The theta factor for the diagonal elements
+
+    # Phased haplotype/unphased genotype multiplicative factor:
+    # Wen and Stephens (2010), Section 2.4
+    phased_mult = [.5, 1.][phased_haplotype]
+
+    # We need to turn the correlation matrix into a covariance matrix to
+    # apply the shrinkage factor. For this, we have to multiply each row
+    # by the product of standard deviations:
+    maf_sd = np.sqrt(phased_mult*maf_var)
+
+    # According to Eqs. 2.6 and 2.7 in Wen and Stephens (2010), the shrunk standard deviation should be:
+    shrunk_sd = np.sqrt(theta_factor*maf_var*phased_mult + theta_diag_factor)
+
+    global_indptr = ld_mat_obj.indptr
+
+    for chunk_idx in range(int(np.ceil(len(ld_mat_obj) / chunk_size))):
+
+        start_row = chunk_idx*chunk_size
+        end_row = min((chunk_idx+1)*chunk_size, len(ld_mat_obj))
+
+        # Load the subset of the LD matrix specified by chunk_size.
+        csr_mat = ld_mat_obj.load_rows(start_row=start_row, end_row=end_row, dtype=np.float32)
+
+        # Get the relevant portion of indices and pointers from the CSR matrix:
+        indptr = global_indptr[start_row:end_row+1]
+
+        row_indices = np.concatenate([
+            (start_row + r_idx)*np.ones(indptr[r_idx+1] - indptr[r_idx], dtype=int)
+            for r_idx in range(len(indptr) - 1)
+        ])
+
+        # Compute the shrinkage factor for entries in the current block:
+        shrink_factor = np.exp(-mult_term*np.abs(cm_pos[csr_mat.indices] - cm_pos[row_indices]))
+        # Set shrinkage factors below the cutoff value to 0.:
+        shrink_factor[shrink_factor < shrinkage_cutoff] = 0.
+        # Compute the theta multiplicative factor following Eq. 2.6 in Wen and Stephens (2010)
+        shrink_factor *= theta_factor
+
+        # The factor to convert the entries of the correlation matrix into corresponding covariances:
+        to_cov_factor = maf_sd[row_indices]*maf_sd[csr_mat.indices]
+
+        # Compute the new denominator for the Pearson correlation:
+        # The shrunk standard deviation of SNP j multiplied by the shrunk standard deviations of each neighbor:
+        shrunk_sd_prod = shrunk_sd[row_indices]*shrunk_sd[csr_mat.indices]
+
+        # Finally, compute the shrunk LD matrix entries:
+        csr_mat.data *= to_cov_factor*shrink_factor / shrunk_sd_prod
+
+        # Update the LD matrix object inplace:
+        ld_mat_obj.update_rows_inplace(csr_mat, start_row=start_row, end_row=end_row)
+
+    return ld_mat_obj
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/score/utils/index.html b/api/stats/score/utils/index.html new file mode 100644 index 0000000..1cc7ad2 --- /dev/null +++ b/api/stats/score/utils/index.html @@ -0,0 +1,1041 @@ + + + + + + + + + + + + + + + + + + + Utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ score_plink2(genotype_matrix, betas, standardize_genotype=False, temp_dir='temp') + +

+ + +
+ +

Perform linear scoring using PLINK2. +This function takes a genotype matrix object encapsulating and referencing +plink BED files as well as a matrix of effect sizes (betas) and performs +linear scoring of the form:

+

y = X * betas

+

This is useful for computing polygenic scores (PGS). The function supports +a matrix of beta values, in which case the function returns a matrix of +PGS values, one for each column of beta. For example, if there are 10 sets +of betas, the function will compute 10 polygenic scores for each individual represented +in the genotype matrix X.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

An instance of plinkBEDGenotypeMatrix.

+
+
+ required +
betas + +
+

A matrix of effect sizes (betas).

+
+
+ required +
standardize_genotype + +
+

If True, standardize the genotype to have mean zero and unit variance before scoring.

+
+
+ False +
temp_dir + +
+

The directory where the temporary files will be stored.

+
+
+ 'temp' +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy array of polygenic scores.

+
+
+ +
+ Source code in magenpy/stats/score/utils.py +
  6
+  7
+  8
+  9
+ 10
+ 11
+ 12
+ 13
+ 14
+ 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
def score_plink2(genotype_matrix,
+                 betas,
+                 standardize_genotype=False,
+                 temp_dir='temp'):
+    """
+    Perform linear scoring using PLINK2.
+    This function takes a genotype matrix object encapsulating and referencing
+    plink BED files as well as a matrix of effect sizes (betas) and performs
+    linear scoring of the form:
+
+    y = X * betas
+
+    This is useful for computing polygenic scores (PGS). The function supports
+    a matrix of `beta` values, in which case the function returns a matrix of
+    PGS values, one for each column of `beta`. For example, if there are 10 sets
+    of betas, the function will compute 10 polygenic scores for each individual represented
+    in the genotype matrix `X`.
+
+    :param genotype_matrix: An instance of `plinkBEDGenotypeMatrix`.
+    :param betas: A matrix of effect sizes (betas).
+    :param standardize_genotype: If True, standardize the genotype to have mean zero and unit variance
+    before scoring.
+    :param temp_dir: The directory where the temporary files will be stored.
+
+    :return: A numpy array of polygenic scores.
+
+    """
+
+    from ...GenotypeMatrix import plinkBEDGenotypeMatrix
+    from ...utils.executors import plink2Executor
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink2 = plink2Executor()
+
+    try:
+        betas_shape = betas.shape[1]
+        if betas_shape == 1:
+            raise IndexError
+        score_col_nums = f"--score-col-nums 3-{3 + betas_shape - 1}"
+    except IndexError:
+        betas_shape = 1
+        betas = betas.reshape(-1, 1)
+        score_col_nums = "--score-col-nums 3"
+
+    # Create the samples file:
+
+    s_table = genotype_matrix.sample_table
+
+    keep_file = osp.join(temp_dir, 'samples.keep')
+    keep_table = s_table.get_individual_table()
+    keep_table.to_csv(keep_file, index=False, header=False, sep="\t")
+
+    eff_file = osp.join(temp_dir, 'variant_effect_size.txt')
+    df = genotype_matrix.get_snp_table(['SNP', 'A1'])
+
+    for i in range(betas_shape):
+        df['BETA' + str(i)] = betas[:, i]
+
+    # Remove any variants whose effect size is zero for all traits:
+    df = df.loc[df[['BETA' + str(i) for i in range(betas_shape)]].sum(axis=1) != 0]
+
+    # Standardize the genotype, if requested:
+    if standardize_genotype:
+        standardize_text = ' variance-standardize'
+    else:
+        standardize_text = ''
+
+    df.to_csv(eff_file, index=False, sep="\t")
+
+    output_file = osp.join(temp_dir, 'samples')
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file}",
+        f"--keep {keep_file}",
+        f"--score {eff_file} 1 2 header-read cols=+scoresums{standardize_text}",
+        score_col_nums,
+        f"--out {output_file}",
+    ]
+
+    plink2.execute(cmd)
+
+    if not osp.isfile(output_file + '.sscore'):
+        raise FileNotFoundError
+
+    dtypes = {'FID': str, 'IID': str}
+    for i in range(betas_shape):
+        dtypes.update({'PRS' + str(i): np.float64})
+
+    chr_pgs = pd.read_csv(output_file + '.sscore',
+                          sep=r'\s+',
+                          names=['FID', 'IID'] + ['PRS' + str(i) for i in range(betas_shape)],
+                          skiprows=1,
+                          usecols=[0, 1] + [4 + betas_shape + i for i in range(betas_shape)],
+                          dtype=dtypes)
+    chr_pgs = keep_table.astype({'FID': str, 'IID': str}).merge(chr_pgs)
+
+    pgs = chr_pgs[['PRS' + str(i) for i in range(betas_shape)]].values
+
+    if betas_shape == 1:
+        pgs = pgs.flatten()
+
+    return pgs
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/transforms/genotype/index.html b/api/stats/transforms/genotype/index.html new file mode 100644 index 0000000..e78e0d7 --- /dev/null +++ b/api/stats/transforms/genotype/index.html @@ -0,0 +1,835 @@ + + + + + + + + + + + + + + + + + + + Genotype - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Genotype

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ standardize(g_mat, fill_na=True) + +

+ + +
+ +

Standardize the genotype matrix, such that the columns (i.e. snps) +have zero mean and unit variance.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
g_mat + +
+

A two-dimensional matrix (numpy, dask, xarray, etc.) where the rows are samples (individuals) and the columns are genetic variants.

+
+
+ required +
fill_na + +
+

If true, fill the missing values with zero after standardizing.

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The standardized genotype matrix.

+
+
+ +
+ Source code in magenpy/stats/transforms/genotype.py +
 2
+ 3
+ 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
def standardize(g_mat, fill_na=True):
+    """
+    Standardize the genotype matrix, such that the columns (i.e. snps)
+    have zero mean and unit variance.
+    :param g_mat: A two-dimensional matrix (numpy, dask, xarray, etc.) where the rows are samples (individuals)
+    and the columns are genetic variants.
+    :param fill_na: If true, fill the missing values with zero after standardizing.
+
+    :return: The standardized genotype matrix.
+
+    """
+    sg_mat = (g_mat - g_mat.mean(axis=0)) / g_mat.std(axis=0)
+
+    if fill_na:
+        sg_mat = sg_mat.fillna(0.)
+
+    return sg_mat
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/transforms/phenotype/index.html b/api/stats/transforms/phenotype/index.html new file mode 100644 index 0000000..dd43433 --- /dev/null +++ b/api/stats/transforms/phenotype/index.html @@ -0,0 +1,1418 @@ + + + + + + + + + + + + + + + + + + + Phenotype - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Phenotype

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ adjust_for_covariates(phenotype, covariates) + +

+ + +
+ +

This function takes a phenotype vector and a matrix of covariates +and applies covariate correction on the phenotype. Concretely, +this involves fitting a linear model where the response is the +phenotype and the predictors are the covariates and then returning +the residuals.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype + +
+

A vector of continuous or quantitative phenotypes.

+
+
+ required +
covariates + +
+

A matrix where each row corresponds to an individual and each column corresponds to a covariate (e.g. age, sex, PCs, etc.) :return: The residuals of the linear model fit.

+
+
+ required +
+ +
+ Source code in magenpy/stats/transforms/phenotype.py +
 4
+ 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
def adjust_for_covariates(phenotype, covariates):
+    """
+    This function takes a phenotype vector and a matrix of covariates
+    and applies covariate correction on the phenotype. Concretely,
+    this involves fitting a linear model where the response is the
+    phenotype and the predictors are the covariates and then returning
+    the residuals.
+    :param phenotype: A vector of continuous or quantitative phenotypes.
+    :param covariates: A matrix where each row corresponds to an individual
+     and each column corresponds to a covariate (e.g. age, sex, PCs, etc.)
+
+     :return: The residuals of the linear model fit.
+    """
+
+    import statsmodels.api as sm
+
+    return sm.OLS(phenotype, sm.add_constant(covariates)).fit().resid
+
+
+
+ +
+ + +
+ + + +

+ chained_transform(sample_table, adjust_covariates=False, standardize_phenotype=False, rint_phenotype=False, outlier_sigma_threshold=None, transform_order=('standardize', 'covariate_adjust', 'rint', 'outlier_removal')) + +

+ + +
+ +

Apply a chain of transformations to the phenotype vector.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sample_table + +
+

An instance of SampleTable that contains phenotype information and other covariates about the samples in the dataset.

+
+
+ required +
adjust_covariates + +
+

If true, regress out the covariates from the phenotype. By default, we regress out all the covariates present in the SampleTable.

+
+
+ False +
standardize_phenotype + +
+

If true, standardize the phenotype.

+
+
+ False +
rint_phenotype + +
+

If true, apply Rank-based inverse normal transform.

+
+
+ False +
outlier_sigma_threshold + +
+

The multiple of standard deviations or sigmas after which we consider the phenotypic value an outlier.

+
+
+ None +
transform_order + +
+

A tuple specifying the order in which to apply the transformations. By default, the order is standardize, covariate_adjust, rint, and outlier_removal.

+
+
+ ('standardize', 'covariate_adjust', 'rint', 'outlier_removal') +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The transformed phenotype vector and a boolean mask indicating the samples that were not removed.

+
+
+ +
+ Source code in magenpy/stats/transforms/phenotype.py +
def chained_transform(sample_table,
+                      adjust_covariates=False,
+                      standardize_phenotype=False,
+                      rint_phenotype=False,
+                      outlier_sigma_threshold=None,
+                      transform_order=('standardize', 'covariate_adjust', 'rint', 'outlier_removal')):
+    """
+    Apply a chain of transformations to the phenotype vector.
+    :param sample_table: An instance of SampleTable that contains phenotype information and other
+    covariates about the samples in the dataset.
+    :param adjust_covariates: If true, regress out the covariates from the phenotype. By default, we regress out all
+    the covariates present in the SampleTable.
+    :param standardize_phenotype: If true, standardize the phenotype.
+    :param rint_phenotype: If true, apply Rank-based inverse normal transform.
+    :param outlier_sigma_threshold: The multiple of standard deviations or sigmas after
+    which we consider the phenotypic value an outlier.
+    :param transform_order: A tuple specifying the order in which to apply the transformations. By default,
+    the order is standardize, covariate_adjust, rint, and outlier_removal.
+
+    :return: The transformed phenotype vector and a boolean mask indicating the samples that were not removed.
+    """
+
+    phenotype = sample_table.phenotype
+    mask = np.ones_like(phenotype, dtype=bool)
+
+    if sample_table.phenotype_likelihood != 'binomial':
+        for transform in transform_order:
+
+            if transform == 'standardize':
+                # Standardize the phenotype:
+                if standardize_phenotype:
+                    phenotype = standardize(phenotype)
+
+            elif transform == 'covariate_adjust':
+                # Adjust the phenotype for a set of covariates:
+                if adjust_covariates:
+                    phenotype = adjust_for_covariates(phenotype, sample_table.get_covariates()[mask, :])
+
+            elif transform == 'rint':
+                # Apply Rank-based inverse normal transform (RINT) to the phenotype:
+                if rint_phenotype:
+                    phenotype = rint(phenotype)
+
+            elif transform == 'outlier_removal':
+                # Remove outlier samples whose phenotypes are more than `threshold` standard deviations from the mean:
+                if outlier_sigma_threshold is not None:
+                    # Find outliers:
+                    mask = detect_outliers(phenotype, outlier_sigma_threshold)
+                    # Filter phenotype vector:
+                    phenotype = phenotype[mask]
+
+    return phenotype, mask
+
+
+
+ +
+ + +
+ + + +

+ detect_outliers(phenotype, sigma_threshold=5) + +

+ + +
+ +

Detect samples with outlier phenotype values. +This function takes a vector of quantitative phenotypes, +computes the z-score for every individual, and returns a +boolean vector indicating whether individual i has phenotype value +within the specified standard deviations sigma_threshold.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype + +
+

A numpy vector of continuous or quantitative phenotypes.

+
+
+ required +
sigma_threshold + +
+

The multiple of standard deviations or sigmas after which we consider the phenotypic value an outlier.

+
+
+ 5 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A boolean array indicating whether the phenotype value is an outlier.

+
+
+ +
+ Source code in magenpy/stats/transforms/phenotype.py +
38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
def detect_outliers(phenotype, sigma_threshold=5):
+    """
+    Detect samples with outlier phenotype values.
+    This function takes a vector of quantitative phenotypes,
+    computes the z-score for every individual, and returns a
+    boolean vector indicating whether individual i has phenotype value
+    within the specified standard deviations `sigma_threshold`.
+    :param phenotype: A numpy vector of continuous or quantitative phenotypes.
+    :param sigma_threshold: The multiple of standard deviations or sigmas after
+    which we consider the phenotypic value an outlier.
+
+    :return: A boolean array indicating whether the phenotype value is an outlier.
+    """
+    from scipy.stats import zscore
+    return np.abs(zscore(phenotype)) < sigma_threshold
+
+
+
+ +
+ + +
+ + + +

+ rint(phenotype, offset=3.0 / 8) + +

+ + +
+ +

Apply Rank-based inverse normal transform on the phenotype.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype + +
+

A vector of continuous or quantitative phenotypes.

+
+
+ required +
offset + +
+

The offset to use in the INT transformation (Blom's offset by default).

+
+
+ 3.0 / 8 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The RINT-transformed phenotype.

+
+
+ +
+ Source code in magenpy/stats/transforms/phenotype.py +
23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
def rint(phenotype, offset=3./8):
+    """
+    Apply Rank-based inverse normal transform on the phenotype.
+    :param phenotype: A vector of continuous or quantitative phenotypes.
+    :param offset: The offset to use in the INT transformation (Blom's offset by default).
+
+    :return: The RINT-transformed phenotype.
+    """
+
+    from scipy.stats import rankdata, norm
+
+    ranked_pheno = rankdata(phenotype, method="average")
+    return norm.ppf((ranked_pheno - offset) / (len(ranked_pheno) - 2 * offset + 1))
+
+
+
+ +
+ + +
+ + + +

+ standardize(phenotype) + +

+ + +
+ +

Standardize the phenotype vector to have mean zero and unit variance

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
phenotype + +
+

A numpy vector of continuous or quantitative phenotypes.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

The standardized phenotype array.

+
+
+ +
+ Source code in magenpy/stats/transforms/phenotype.py +
55
+56
+57
+58
+59
+60
+61
+62
def standardize(phenotype):
+    """
+    Standardize the phenotype vector to have mean zero and unit variance
+    :param phenotype: A numpy vector of continuous or quantitative phenotypes.
+
+    :return: The standardized phenotype array.
+    """
+    return (phenotype - phenotype.mean()) / phenotype.std()
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/stats/variant/utils/index.html b/api/stats/variant/utils/index.html new file mode 100644 index 0000000..41532e2 --- /dev/null +++ b/api/stats/variant/utils/index.html @@ -0,0 +1,1078 @@ + + + + + + + + + + + + + + + + + + + Utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ compute_allele_frequency_plink2(genotype_matrix, temp_dir='temp') + +

+ + +
+ +

Compute the allele frequency for each SNP in the genotype matrix using PLINK2.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A GenotypeMatrix object.

+
+
+ required +
temp_dir + +
+

The temporary directory where to store intermediate files.

+
+
+ 'temp' +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy array of allele frequencies.

+
+
+ +
+ Source code in magenpy/stats/variant/utils.py +
 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
def compute_allele_frequency_plink2(genotype_matrix, temp_dir='temp'):
+    """
+    Compute the allele frequency for each SNP in the genotype matrix using PLINK2.
+    :param genotype_matrix: A GenotypeMatrix object.
+    :param temp_dir: The temporary directory where to store intermediate files.
+
+    :return: A numpy array of allele frequencies.
+
+    """
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink2 = plink2Executor()
+
+    s_table = genotype_matrix.sample_table
+
+    keep_file = osp.join(temp_dir, 'samples.keep')
+    keep_table = s_table.get_individual_table()
+    keep_table.to_csv(keep_file, index=False, header=False, sep="\t")
+
+    snp_keepfile = osp.join(temp_dir, f"variants.keep")
+    pd.DataFrame({'SNP': genotype_matrix.snps}).to_csv(
+        snp_keepfile, index=False, header=False
+    )
+
+    plink_output = osp.join(temp_dir, "variants")
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file}",
+        f"--keep {keep_file}",
+        f"--extract {snp_keepfile}",
+        f"--freq",
+        f"--out {plink_output}",
+    ]
+
+    plink2.execute(cmd)
+
+    freq_df = pd.read_csv(plink_output + ".afreq", sep=r'\s+')
+    freq_df.rename(columns={'ID': 'SNP',
+                            'REF': 'A2',
+                            'ALT': 'A1', 'ALT1': 'A1',
+                            'ALT_FREQS': 'MAF', 'ALT1_FREQ': 'MAF'}, inplace=True)
+    merged_df = merge_snp_tables(genotype_matrix.get_snp_table(['SNP', 'A1', 'A2']), freq_df)
+
+    if len(merged_df) != genotype_matrix.n_snps:
+        raise ValueError("Length of allele frequency table does not match number of SNPs.")
+
+    return merged_df['MAF'].values
+
+
+
+ +
+ + +
+ + + +

+ compute_sample_size_per_snp_plink2(genotype_matrix, temp_dir='temp') + +

+ + +
+ +

Compute the sample size per SNP in the genotype matrix using PLINK2.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
genotype_matrix + +
+

A GenotypeMatrix object.

+
+
+ required +
temp_dir + +
+

The temporary directory where to store intermediate files.

+
+
+ 'temp' +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy array of sample sizes per SNP.

+
+
+ +
+ Source code in magenpy/stats/variant/utils.py +
def compute_sample_size_per_snp_plink2(genotype_matrix, temp_dir='temp'):
+    """
+    Compute the sample size per SNP in the genotype matrix using PLINK2.
+    :param genotype_matrix: A GenotypeMatrix object.
+    :param temp_dir: The temporary directory where to store intermediate files.
+
+    :return: A numpy array of sample sizes per SNP.
+    """
+
+    assert isinstance(genotype_matrix, plinkBEDGenotypeMatrix)
+
+    plink2 = plink2Executor()
+
+    s_table = genotype_matrix.sample_table
+
+    keep_file = osp.join(temp_dir, 'samples.keep')
+    keep_table = s_table.get_individual_table()
+    keep_table.to_csv(keep_file, index=False, header=False, sep="\t")
+
+    snp_keepfile = osp.join(temp_dir, f"variants.keep")
+    pd.DataFrame({'SNP': genotype_matrix.snps}).to_csv(
+        snp_keepfile, index=False, header=False
+    )
+
+    plink_output = osp.join(temp_dir, "variants")
+
+    cmd = [
+        f"--bfile {genotype_matrix.bed_file}",
+        f"--keep {keep_file}",
+        f"--extract {snp_keepfile}",
+        f"--missing variant-only",
+        f"--out {plink_output}",
+    ]
+
+    plink2.execute(cmd)
+
+    miss_df = pd.read_csv(plink_output + ".vmiss", sep=r'\s+')
+    miss_df = pd.DataFrame({'ID': genotype_matrix.snps}).merge(miss_df)
+
+    if len(miss_df) != genotype_matrix.n_snps:
+        raise ValueError("Length of missingness table does not match number of SNPs.")
+
+    return (miss_df['OBS_CT'] - miss_df['MISSING_CT']).values
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/utils/compute_utils/index.html b/api/utils/compute_utils/index.html new file mode 100644 index 0000000..035391d --- /dev/null +++ b/api/utils/compute_utils/index.html @@ -0,0 +1,1053 @@ + + + + + + + + + + + + + + + + + + + Compute utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Compute utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ generate_slice_dictionary(vec) + +

+ + +
+ +

This utility function takes a sorted vector (e.g. numpy array), +identifies the unique elements and generates a dictionary of slices +delineating the start and end positions of each element in the vector.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
vec + +
+

A numpy array

+
+
+ required +
+ +
+ Source code in magenpy/utils/compute_utils.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
def generate_slice_dictionary(vec):
+    """
+    This utility function takes a sorted vector (e.g. numpy array),
+    identifies the unique elements and generates a dictionary of slices
+    delineating the start and end positions of each element in the vector.
+
+    :param vec: A numpy array
+    """
+
+    vals, idx = np.unique(vec, return_index=True)
+    idx_sort = np.argsort(idx)
+
+    vals = vals[idx_sort]
+    idx = idx[idx_sort]
+
+    d = {}
+
+    for i in range(len(idx)):
+        try:
+            d[vals[i]] = slice(idx[i], idx[i + 1])
+        except IndexError:
+            d[vals[i]] = slice(idx[i], len(vec))
+
+    return d
+
+
+
+ +
+ + +
+ + + +

+ intersect_arrays(arr1, arr2, return_index=False) + +

+ + +
+ +

This utility function takes two arrays and returns the shared +elements (intersection) between them. If return_index is set to True, +it returns the index of shared elements in the first array.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
arr1 + +
+

The first array

+
+
+ required +
arr2 + +
+

The second array

+
+
+ required +
return_index + +
+

Return the index of shared elements in the first array

+
+
+ False +
+ +
+ Source code in magenpy/utils/compute_utils.py +
31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
def intersect_arrays(arr1, arr2, return_index=False):
+    """
+    This utility function takes two arrays and returns the shared
+    elements (intersection) between them. If return_index is set to True,
+    it returns the index of shared elements in the first array.
+
+    :param arr1: The first array
+    :param arr2: The second array
+    :param return_index: Return the index of shared elements in the first array
+    """
+
+    # NOTE: For best and consistent results, we cast all data types to `str`
+    # for now. May need a smarter solution in the future.
+    common_elements = pd.DataFrame({'ID': arr1}, dtype=str).reset_index().merge(
+        pd.DataFrame({'ID': arr2}, dtype=str)
+    )
+
+    if return_index:
+        return common_elements['index'].values
+    else:
+        return common_elements['ID'].values
+
+
+
+ +
+ + +
+ + + +

+ iterable(arg) + +

+ + +
+ +

Check if an object is iterable (but not a string).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
arg + +
+

A python object.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

True if the object is iterable, False otherwise.

+
+
+ +
+ Source code in magenpy/utils/compute_utils.py +
54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
def iterable(arg):
+    """
+    Check if an object is iterable (but not a string).
+    :param arg: A python object.
+    :return: True if the object is iterable, False otherwise.
+    """
+
+    import collections.abc
+
+    return (
+        isinstance(arg, collections.abc.Iterable)
+        and not isinstance(arg, str)
+    )
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/utils/data_utils/index.html b/api/utils/data_utils/index.html new file mode 100644 index 0000000..d5be98c --- /dev/null +++ b/api/utils/data_utils/index.html @@ -0,0 +1,801 @@ + + + + + + + + + + + + + + + + + + + Data utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Data utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ tgp_eur_data_path() + +

+ + +
+ +

Return the path of the attached 1000G genotype data for +European samples (N=378) and a subset of chromosome 22 (p=15938)

+ +
+ Source code in magenpy/utils/data_utils.py +
4
+5
+6
+7
+8
+9
def tgp_eur_data_path():
+    """
+    Return the path of the attached 1000G genotype data for
+    European samples (N=378) and a subset of chromosome 22 (p=15938)
+    """
+    return osp.join(osp.dirname(osp.dirname(__file__)), 'data/1000G_eur_chr22')
+
+
+
+ +
+ + +
+ + + +

+ ukb_height_sumstats_path() + +

+ + +
+ +

Return the path of the attached GWAS summary statistics file +for standing height. The file contains summary statistics for +HapMap3 variants on CHR22 and is a snapshot of the summary statistics +published on the fastGWA database: +https://yanglab.westlake.edu.cn/data/fastgwa_data/UKB/50.v1.1.fastGWA.gz

+ +
+ Source code in magenpy/utils/data_utils.py +
12
+13
+14
+15
+16
+17
+18
+19
+20
def ukb_height_sumstats_path():
+    """
+    Return the path of the attached GWAS summary statistics file
+    for standing height. The file contains summary statistics for
+    HapMap3 variants on CHR22 and is a snapshot of the summary statistics
+    published on the fastGWA database:
+    https://yanglab.westlake.edu.cn/data/fastgwa_data/UKB/50.v1.1.fastGWA.gz
+    """
+    return osp.join(osp.dirname(osp.dirname(__file__)), 'data/ukb_height_chr22.fastGWA.gz')
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/utils/executors/index.html b/api/utils/executors/index.html new file mode 100644 index 0000000..eefb35c --- /dev/null +++ b/api/utils/executors/index.html @@ -0,0 +1,1436 @@ + + + + + + + + + + + + + + + + + + + Executors - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Executors

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + +

+ plink1Executor + + +

+ + +
+

+ Bases: object

+ + +

A wrapper class for interfacing with the plink1.9 command line tool.

+ +
+ Source code in magenpy/utils/executors.py +
54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
+97
+98
class plink1Executor(object):
+    """
+    A wrapper class for interfacing with the `plink1.9` command line tool.
+    """
+
+    def __init__(self, threads='auto', verbose=True):
+        """
+        Initialize the plink1.9 executor
+        :param threads: The number of threads to use for computations. If set to 'auto', the number of
+        available CPUs will be used.
+        :type threads: int or str
+        :param verbose: Whether to print the output of the command
+        :type verbose: bool
+        """
+
+        if threads == 'auto':
+            self.threads = available_cpu()
+        else:
+            self.threads = threads
+
+        self.plink1_path = mgp.get_option('plink1.9_path')
+
+        if not is_cmd_tool(self.plink1_path):
+            raise Exception(f"Did not find the executable for plink at: {self.plink1_path}")
+
+        self.verbose = verbose
+
+    def execute(self, cmd):
+        """
+        Execute a plink command
+        :param cmd: The flags to pass to plink. For example, ['--bfile', 'file', '--out', 'output']
+        :type cmd: list of strings
+        """
+
+        cmd = [self.plink1_path] + cmd + [f'--threads {self.threads}']
+
+        from subprocess import CalledProcessError
+
+        try:
+            run_shell_script(" ".join(cmd))
+        except CalledProcessError as e:
+            if self.verbose:
+                print("Invocation of plink returned the following error message:")
+                print(e.stderr.decode())
+            raise e
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(threads='auto', verbose=True) + +

+ + +
+ +

Initialize the plink1.9 executor

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
threads + int | str + +
+

The number of threads to use for computations. If set to 'auto', the number of available CPUs will be used.

+
+
+ 'auto' +
verbose + bool + +
+

Whether to print the output of the command

+
+
+ True +
+ +
+ Source code in magenpy/utils/executors.py +
59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
def __init__(self, threads='auto', verbose=True):
+    """
+    Initialize the plink1.9 executor
+    :param threads: The number of threads to use for computations. If set to 'auto', the number of
+    available CPUs will be used.
+    :type threads: int or str
+    :param verbose: Whether to print the output of the command
+    :type verbose: bool
+    """
+
+    if threads == 'auto':
+        self.threads = available_cpu()
+    else:
+        self.threads = threads
+
+    self.plink1_path = mgp.get_option('plink1.9_path')
+
+    if not is_cmd_tool(self.plink1_path):
+        raise Exception(f"Did not find the executable for plink at: {self.plink1_path}")
+
+    self.verbose = verbose
+
+
+
+ +
+ + +
+ + + +

+ execute(cmd) + +

+ + +
+ +

Execute a plink command

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cmd + list of strings + +
+

The flags to pass to plink. For example, ['--bfile', 'file', '--out', 'output']

+
+
+ required +
+ +
+ Source code in magenpy/utils/executors.py +
81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
+97
+98
def execute(self, cmd):
+    """
+    Execute a plink command
+    :param cmd: The flags to pass to plink. For example, ['--bfile', 'file', '--out', 'output']
+    :type cmd: list of strings
+    """
+
+    cmd = [self.plink1_path] + cmd + [f'--threads {self.threads}']
+
+    from subprocess import CalledProcessError
+
+    try:
+        run_shell_script(" ".join(cmd))
+    except CalledProcessError as e:
+        if self.verbose:
+            print("Invocation of plink returned the following error message:")
+            print(e.stderr.decode())
+        raise e
+
+
+
+ +
+ + + +
+ +
+ + +
+ +
+ + + +

+ plink2Executor + + +

+ + +
+

+ Bases: object

+ + +

A wrapper class for interfacing with the plink2 command line tool.

+ +
+ Source code in magenpy/utils/executors.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
class plink2Executor(object):
+    """
+    A wrapper class for interfacing with the `plink2` command line tool.
+    """
+
+    def __init__(self, threads='auto', verbose=True):
+        """
+        Initialize the plink2 executor
+        :param threads: The number of threads to use for computations. If set to 'auto', the number of
+        available CPUs will be used.
+        :type threads: int or str
+        :param verbose: Whether to print the output of the command
+        :type verbose: bool
+        """
+
+        if threads == 'auto':
+            self.threads = available_cpu()
+        else:
+            self.threads = threads
+
+        self.plink2_path = mgp.get_option('plink2_path')
+
+        if not is_cmd_tool(self.plink2_path):
+            raise Exception(f"Did not find the executable for plink2 at: {self.plink2_path}")
+
+        self.verbose = verbose
+
+    def execute(self, cmd):
+        """
+        Execute a `plink2` command
+        :param cmd: The flags to pass to plink2. For example, ['--bfile', 'file', '--out', 'output']
+        :type cmd: list of strings
+        """
+
+        cmd = [self.plink2_path] + cmd + [f'--threads {self.threads}']
+
+        from subprocess import CalledProcessError
+
+        try:
+            run_shell_script(" ".join(cmd))
+        except CalledProcessError as e:
+
+            if self.verbose:
+                print("Invocation of plink2 returned the following error message:")
+                print(e.stderr.decode())
+
+            raise e
+
+
+ + + +
+ + + + + + + + + + +
+ + + +

+ __init__(threads='auto', verbose=True) + +

+ + +
+ +

Initialize the plink2 executor

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
threads + int | str + +
+

The number of threads to use for computations. If set to 'auto', the number of available CPUs will be used.

+
+
+ 'auto' +
verbose + bool + +
+

Whether to print the output of the command

+
+
+ True +
+ +
+ Source code in magenpy/utils/executors.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
def __init__(self, threads='auto', verbose=True):
+    """
+    Initialize the plink2 executor
+    :param threads: The number of threads to use for computations. If set to 'auto', the number of
+    available CPUs will be used.
+    :type threads: int or str
+    :param verbose: Whether to print the output of the command
+    :type verbose: bool
+    """
+
+    if threads == 'auto':
+        self.threads = available_cpu()
+    else:
+        self.threads = threads
+
+    self.plink2_path = mgp.get_option('plink2_path')
+
+    if not is_cmd_tool(self.plink2_path):
+        raise Exception(f"Did not find the executable for plink2 at: {self.plink2_path}")
+
+    self.verbose = verbose
+
+
+
+ +
+ + +
+ + + +

+ execute(cmd) + +

+ + +
+ +

Execute a plink2 command

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cmd + list of strings + +
+

The flags to pass to plink2. For example, ['--bfile', 'file', '--out', 'output']

+
+
+ required +
+ +
+ Source code in magenpy/utils/executors.py +
32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
def execute(self, cmd):
+    """
+    Execute a `plink2` command
+    :param cmd: The flags to pass to plink2. For example, ['--bfile', 'file', '--out', 'output']
+    :type cmd: list of strings
+    """
+
+    cmd = [self.plink2_path] + cmd + [f'--threads {self.threads}']
+
+    from subprocess import CalledProcessError
+
+    try:
+        run_shell_script(" ".join(cmd))
+    except CalledProcessError as e:
+
+        if self.verbose:
+            print("Invocation of plink2 returned the following error message:")
+            print(e.stderr.decode())
+
+        raise e
+
+
+
+ +
+ + + +
+ +
+ + +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/utils/model_utils/index.html b/api/utils/model_utils/index.html new file mode 100644 index 0000000..f5e2f6c --- /dev/null +++ b/api/utils/model_utils/index.html @@ -0,0 +1,2274 @@ + + + + + + + + + + + + + + + + + + + Model utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

Model utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ dequantize(ints, float_dtype=np.float32) + +

+ + +
+ +

Dequantize integers to the specified floating point type. +NOTE: Assumes original floats are in the range [-1, 1].

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ints + +
+

A numpy array of integers

+
+
+ required +
float_dtype + +
+

The floating point type to dequantize to.

+
+
+ float32 +
+ +
+ Source code in magenpy/utils/model_utils.py +
def dequantize(ints, float_dtype=np.float32):
+    """
+    Dequantize integers to the specified floating point type.
+    NOTE: Assumes original floats are in the range [-1, 1].
+    :param ints: A numpy array of integers
+    :param float_dtype: The floating point type to dequantize to.
+    """
+
+    # Infer the boundaries from the integer type
+    info = np.iinfo(ints.dtype)
+
+    # Compute the scale and zero point
+    # NOTE: We add 1 to the info.min here to force the zero point to be exactly at 0.
+    # See discussions on Scale Quantization Mapping.
+    scale = 2. / (info.max - (info.min + 1))
+
+    return ints.astype(float_dtype) * scale
+
+
+
+ +
+ + +
+ + + +

+ get_shared_distance_matrix(tree, tips=None) + +

+ + +
+ +

This function takes a Biopython tree and returns the +shared distance matrix, i.e. for a pair of clades or populations, +time to most recent common ancestor of the pair minus the time of +the most recent common ancestor (MRCA).

+ +
+ Source code in magenpy/utils/model_utils.py +
def get_shared_distance_matrix(tree, tips=None):
+    """
+    This function takes a Biopython tree and returns the
+    shared distance matrix, i.e. for a pair of clades or populations,
+    time to most recent common ancestor of the pair minus the time of
+    the most recent common ancestor (MRCA).
+    """
+
+    tips = tree.get_terminals() if tips is None else tips
+    n_tips = len(tips)  # Number of terminal species
+    sdist_matrix = np.zeros((n_tips, n_tips))  # Shared distance matrix
+
+    for i in range(n_tips):
+        for j in range(i, n_tips):
+            if i == j:
+                sdist_matrix[i, j] = tree.distance(tree.root, tips[i])
+            else:
+                mrca = tree.common_ancestor(tips[i], tips[j])
+                sdist_matrix[i, j] = sdist_matrix[j, i] = tree.distance(tree.root, mrca)
+
+    return sdist_matrix
+
+
+
+ +
+ + +
+ + + +

+ identify_mismatched_snps(gdl, chrom=None, n_iter=10, G=100, p_dentist_threshold=5e-08, p_gwas_threshold=0.01, rsq_threshold=0.95, max_removed_per_iter=0.005) + +

+ + +
+ +

This function implements a simple quality control procedures +that checks that the GWAS summary statistics (Z-scores) +are consistent with the LD reference panel. This is done +using a simplified version of the framework outlined in the DENTIST paper:

+

Improved analyses of GWAS summary statistics by reducing data heterogeneity and errors +Chen et al. 2021

+

Compared to DENTIST, the simplifications we make are: + - For each SNP, we sample one neighboring SNP at a time and compute the T statistic + using that neighbor's information. The benefit of this is that we don't need to + invert any matrices, so it's a fast operation to run. + - To arrive at a more robust estimate, we sample up to k neighbors and average + the T-statistic across those k neighbors.

+

NOTE: May need to re-implement this to apply some of the constraints genome-wide +rather than on a per-chromosome basis.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
gdl + +
+

A GWADataLoader object

+
+
+ required +
chrom + +
+

Perform checking only on chromosome chrom

+
+
+ None +
n_iter + +
+

Number of iterations

+
+
+ 10 +
G + +
+

The number of neighboring SNPs to sample (default: 100)

+
+
+ 100 +
p_dentist_threshold + +
+

The Bonferroni-corrected P-value threshold (default: 5e-8)

+
+
+ 5e-08 +
p_gwas_threshold + +
+

The nominal GWAS P-value threshold for partitioning variants (default: 1e-2)

+
+
+ 0.01 +
rsq_threshold + +
+

The R^2 threshold to select neighbors (neighbor's squared correlation coefficient must be less than specified threshold).

+
+
+ 0.95 +
max_removed_per_iter + +
+

The maximum proportion of variants removed in each iteration

+
+
+ 0.005 +
+ +
+ Source code in magenpy/utils/model_utils.py +
def identify_mismatched_snps(gdl,
+                             chrom=None,
+                             n_iter=10,
+                             G=100,
+                             p_dentist_threshold=5e-8,
+                             p_gwas_threshold=1e-2,
+                             rsq_threshold=.95,
+                             max_removed_per_iter=.005):
+    """
+    This function implements a simple quality control procedures
+    that checks that the GWAS summary statistics (Z-scores)
+    are consistent with the LD reference panel. This is done
+    using a simplified version of the framework outlined in the DENTIST paper:
+
+    Improved analyses of GWAS summary statistics by reducing data heterogeneity and errors
+    Chen et al. 2021
+
+    Compared to DENTIST, the simplifications we make are:
+        -   For each SNP, we sample one neighboring SNP at a time and compute the T statistic
+            using that neighbor's information. The benefit of this is that we don't need to
+            invert any matrices, so it's a fast operation to run.
+        -   To arrive at a more robust estimate, we sample up to `k` neighbors and average
+            the T-statistic across those `k` neighbors.
+
+    NOTE: May need to re-implement this to apply some of the constraints genome-wide
+    rather than on a per-chromosome basis.
+
+    :param gdl: A `GWADataLoader` object
+    :param chrom: Perform checking only on chromosome `chrom`
+    :param n_iter: Number of iterations
+    :param G: The number of neighboring SNPs to sample (default: 100)
+    :param p_dentist_threshold: The Bonferroni-corrected P-value threshold (default: 5e-8)
+    :param p_gwas_threshold: The nominal GWAS P-value threshold for partitioning variants (default: 1e-2)
+    :param rsq_threshold: The R^2 threshold to select neighbors (neighbor's squared
+    correlation coefficient must be less than specified threshold).
+    :param max_removed_per_iter: The maximum proportion of variants removed in each iteration
+    """
+
+    if chrom is None:
+        chromosomes = gdl.chromosomes
+    else:
+        chromosomes = [chrom]
+
+    shapes = gdl.shapes
+    mismatched_dict = {c: np.repeat(False, gdl.shapes[c])
+                       for c in chromosomes}
+
+    p_gwas_above_thres = {c: gdl.sumstats_table[c].p_value > p_gwas_threshold for c in chromosomes}
+    gwas_thres_size = {c: p.sum() for c, p in p_gwas_above_thres.items()}
+    converged = {c: False for c in chromosomes}
+
+    for j in tqdm(range(n_iter),
+                  total=n_iter,
+                  desc="Identifying mismatched SNPs..."):
+
+        for chrom in chromosomes:
+
+            if converged[chrom]:
+                continue
+
+            ld_bounds = gdl.ld[chrom].get_masked_boundaries()
+            z = gdl.z_scores[chrom]  # Obtain the z-scores
+            t = np.zeros_like(z)
+
+            # Loop over the LD matrix:
+            for i, r in enumerate(gdl.ld[chrom]):
+
+                # If the number of neighbors is less than 10, skip...
+                if mismatched_dict[chrom][i] or len(r) < 10:
+                    continue
+
+                start_idx = ld_bounds[0, i]
+                # Select neighbors randomly
+                # Note: We are excluding neighbors whose squared correlation coefficient
+                # is greater than pre-specified threshold:
+                p = (np.array(r)**2 < rsq_threshold).astype(float)
+                p /= p.sum()
+
+                neighbor_idx = np.random.choice(len(r), p=p, size=G)
+                neighbor_r = np.array(r)[neighbor_idx]
+
+                # Predict the z-score of snp i, given the z-scores of its neighbors:
+                pred_z = neighbor_r*z[start_idx + neighbor_idx]
+
+                # Compute the Td statistic for each neighbor and average:
+                t[i] = ((z[i] - pred_z) ** 2 / (1. - neighbor_r**2)).mean()
+
+            # Compute the DENTIST p-value assuming a Chi-Square distribution with 1 dof.
+            dentist_pval = 1. - stats.chi2.cdf(t, 1)
+            # Use a Bonferroni correction to select mismatched SNPs:
+            mismatched_snps = dentist_pval < p_dentist_threshold
+
+            if mismatched_snps.sum() < 1:
+                # If no new mismatched SNPs are identified, stop iterating...
+                converged[chrom] = True
+            elif j == n_iter - 1:
+                # If this is the last iteration, take all identified SNPs
+                mismatched_dict[chrom] = (mismatched_dict[chrom] | mismatched_snps)
+            else:
+
+                # Otherwise, we will perform the iterative filtering procedure
+                # by splitting variants based on their GWAS p-values:
+
+                # (1) Group S1: SNPs to remove from P_GWAS > threshold:
+                mismatch_above_thres = mismatched_snps & p_gwas_above_thres[chrom]
+                n_mismatch_above_thres = mismatch_above_thres.sum()
+                prop_mismatch_above_thres = n_mismatch_above_thres / gwas_thres_size[chrom]
+
+                if n_mismatch_above_thres < 1:
+                    # If no mismatches are detected above the threshold, filter
+                    # the mismatches below the threshold and continue...
+                    mismatched_dict[chrom] = (mismatched_dict[chrom] | mismatched_snps)
+                    continue
+
+                # Sort the DENTIST p-values by index:
+                sort_d_pval_idx = np.argsort(dentist_pval)
+
+                if prop_mismatch_above_thres > max_removed_per_iter:
+                    idx_to_keep = sort_d_pval_idx[mismatch_above_thres][
+                                  int(gwas_thres_size[chrom]*max_removed_per_iter):]
+                    mismatch_above_thres[idx_to_keep] = False
+
+                # (2) Group S2: SNPs to remove from P_GWAS < threshold
+
+                # Find mismatched variants below the threshold:
+                mismatch_below_thres = mismatched_snps & (~p_gwas_above_thres[chrom])
+                n_mismatch_below_thres = mismatch_below_thres.sum()
+                prop_mismatch_below_thres = n_mismatch_below_thres / (shapes[chrom] - gwas_thres_size[chrom])
+
+                # For the mismatched variants below the threshold,
+                # we remove the same proportion as the variants above the threshold:
+                prop_keep_below_thres = min(max_removed_per_iter, prop_mismatch_above_thres)
+
+                if prop_mismatch_below_thres > prop_keep_below_thres:
+                    idx_to_keep = sort_d_pval_idx[mismatch_below_thres][
+                                  int((shapes[chrom] - gwas_thres_size[chrom]) * prop_keep_below_thres):
+                                  ]
+                    mismatch_below_thres[idx_to_keep] = False
+
+                # Update the number of variants above the threshold:
+                gwas_thres_size[chrom] -= mismatch_above_thres.sum()
+
+                # Update the mismatched dictionary:
+                mismatched_dict[chrom] = (mismatched_dict[chrom] | mismatch_below_thres | mismatch_above_thres)
+
+    return mismatched_dict
+
+
+
+ +
+ + +
+ + + +

+ match_chromosomes(chrom_1, chrom_2, check_patterns=('chr_', 'chr:', 'chr'), return_both=False) + +

+ + +
+ +

Given two lists of chromosome IDs, this function returns the +chromosomes that are common to both lists. By default, the returned chromosomes +follow the data type and order of the first list. If return_both is set to True, +the function returns the common chromosomes in both lists.

+

The function also accounts for common ways to encode chromosomes, such as +chr18, chr_18, 18, etc.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
chrom_1 + +
+

A list or numpy array of chromosome IDs

+
+
+ required +
chrom_2 + +
+

A list or numpy array of chromosome IDs

+
+
+ required +
check_patterns + +
+

A list of patterns to check for and replace in the chromosome IDs

+
+
+ ('chr_', 'chr:', 'chr') +
return_both + +
+

If True, return the common chromosomes in both lists

+
+
+ False +
+ +
+ Source code in magenpy/utils/model_utils.py +
 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
def match_chromosomes(chrom_1, chrom_2, check_patterns=('chr_', 'chr:', 'chr'), return_both=False):
+    """
+    Given two lists of chromosome IDs, this function returns the
+    chromosomes that are common to both lists. By default, the returned chromosomes
+    follow the data type and order of the first list. If `return_both` is set to True,
+    the function returns the common chromosomes in both lists.
+
+    The function also accounts for common ways to encode chromosomes, such as
+    chr18, chr_18, 18, etc.
+
+    :param chrom_1: A list or numpy array of chromosome IDs
+    :param chrom_2: A list or numpy array of chromosome IDs
+    :param check_patterns: A list of patterns to check for and replace in the chromosome IDs
+    :param return_both: If True, return the common chromosomes in both lists
+    """
+
+    chrom_1 = np.array(list(chrom_1))
+    chrom_2 = np.array(list(chrom_2))
+
+    # First, convert the data types to strings:
+    chr1_str = chrom_1.astype(str)
+    chr2_str = chrom_2.astype(str)
+
+    _, chr1_idx, chr2_idx = np.intersect1d(chr1_str, chr2_str, return_indices=True)
+
+    if len(chr1_idx) < 1:
+        # Replace patterns
+        for pattern in check_patterns:
+            chr1_str = np.char.replace(chr1_str, pattern, '')
+            chr2_str = np.char.replace(chr2_str, pattern, '')
+
+        _, chr1_idx, chr2_idx = np.intersect1d(chr1_str, chr2_str, return_indices=True)
+
+    if len(chr1_idx) < 1:
+        if return_both:
+            return [], []
+        else:
+            return []
+    else:
+        if return_both:
+            return chrom_1[chr1_idx], chrom_2[chr2_idx]
+        else:
+            return chrom_1[chr1_idx]
+
+
+
+ +
+ + +
+ + + +

+ merge_snp_tables(ref_table, alt_table, how='inner', on='auto', signed_statistics=('BETA', 'STD_BETA', 'Z'), drop_duplicates=True, correct_flips=True, return_ref_indices=False, return_alt_indices=False) + +

+ + +
+ +

This function takes a reference SNP table with at least 3 columns ('SNP', 'A1', A2) +and matches it with an alternative table that also has these 3 columns defined. In the most recent +implementation, we allow users to merge on any set of columns that they wish by specifying the on +parameter. For example, instead of SNP, the user can join the SNP tables on CHR and POS, the +chromosome number and base pair position of the SNP.

+

The manner in which the join operation takes place depends on the how argument. +Currently, the function supports inner and left joins.

+

The function removes duplicates if drop_duplicates parameter is set to True

+

If correct_flips is set to True, the function will correct summary statistics in +the alternative table alt_table (e.g. BETA, MAF) based whether the A1 alleles agree between the two tables.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
ref_table + +
+

The reference table (pandas dataframe).

+
+
+ required +
alt_table + +
+

The alternative table (pandas dataframe)

+
+
+ required +
how + +
+

inner or left

+
+
+ 'inner' +
on + +
+

Which columns to use as anchors when merging. By default, we automatically infer which columns to use, but the user can specify this directly. When on='auto', we try to use SNP (i.e. rsID) if available. If not, we use ['CHR', 'POS']. If neither are available, we raise a ValueError.

+
+
+ 'auto' +
signed_statistics + +
+

The columns with signed statistics to flip if correct_flips is set to True.

+
+
+ ('BETA', 'STD_BETA', 'Z') +
drop_duplicates + +
+

Drop duplicate SNPs

+
+
+ True +
correct_flips + +
+

Correct SNP summary statistics that depend on status of alternative allele

+
+
+ True +
return_ref_indices + +
+

Return the indices of the remaining entries in the reference table before merging.

+
+
+ False +
return_alt_indices + +
+

Return the indices of the remaining entries in the alternative table before merging.

+
+
+ False +
+ +
+ Source code in magenpy/utils/model_utils.py +
def merge_snp_tables(ref_table,
+                     alt_table,
+                     how='inner',
+                     on='auto',
+                     signed_statistics=('BETA', 'STD_BETA', 'Z'),
+                     drop_duplicates=True,
+                     correct_flips=True,
+                     return_ref_indices=False,
+                     return_alt_indices=False):
+    """
+    This function takes a reference SNP table with at least 3 columns ('SNP', 'A1', `A2`)
+    and matches it with an alternative table that also has these 3 columns defined. In the most recent
+    implementation, we allow users to merge on any set of columns that they wish by specifying the `on`
+    parameter. For example, instead of `SNP`, the user can join the SNP tables on `CHR` and `POS`, the
+    chromosome number and base pair position of the SNP.
+
+    The manner in which the join operation takes place depends on the `how` argument.
+    Currently, the function supports `inner` and `left` joins.
+
+    The function removes duplicates if `drop_duplicates` parameter is set to True
+
+    If `correct_flips` is set to True, the function will correct summary statistics in
+    the alternative table `alt_table` (e.g. BETA, MAF) based whether the A1 alleles agree between the two tables.
+
+    :param ref_table: The reference table (pandas dataframe).
+    :param alt_table: The alternative table (pandas dataframe)
+    :param how: `inner` or `left`
+    :param on: Which columns to use as anchors when merging. By default, we automatically infer which columns
+    to use, but the user can specify this directly. When `on='auto'`, we try to use `SNP` (i.e. rsID) if available.
+    If not, we use `['CHR', 'POS']`. If neither are available, we raise a ValueError.
+    :param signed_statistics: The columns with signed statistics to flip if `correct_flips` is set to True.
+    :param drop_duplicates: Drop duplicate SNPs
+    :param correct_flips: Correct SNP summary statistics that depend on status of alternative allele
+    :param return_ref_indices: Return the indices of the remaining entries in the reference table before merging.
+    :param return_alt_indices: Return the indices of the remaining entries in the alternative table before merging.
+    """
+
+    # Sanity checking steps:
+    assert how in ('left', 'inner')
+    for tab in (ref_table, alt_table):
+        assert isinstance(tab, pd.DataFrame)
+        if not all([col in tab.columns for col in ('A1', 'A2')]):
+            raise ValueError("To merge SNP tables, we require that the columns `A1` and `A2` are present.")
+
+    if on == 'auto':
+        # Check that the `SNP` column is present in both tables:
+        if all(['SNP' in tab.columns for tab in (ref_table, alt_table)]):
+            on = ['SNP']
+        # Check that the `CHR`, `POS` columns are present in both tables:
+        elif all([col in tab.columns for col in ('CHR', 'POS') for tab in (ref_table, alt_table)]):
+            on = ['CHR', 'POS']
+        else:
+            raise ValueError("Cannot merge SNP tables without specifying which columns to merge on.")
+    elif isinstance(on, str):
+        on = [on]
+
+    ref_include = on + ['A1', 'A2']
+
+    if return_ref_indices:
+        ref_table.reset_index(inplace=True, names='REF_IDX')
+        ref_include += ['REF_IDX']
+    if return_alt_indices:
+        alt_table.reset_index(inplace=True, names='ALT_IDX')
+
+    merged_table = ref_table[ref_include].merge(alt_table, how=how, on=on)
+
+    if drop_duplicates:
+        merged_table.drop_duplicates(inplace=True, subset=on)
+
+    if how == 'left':
+        merged_table['A1_y'] = merged_table['A1_y'].fillna(merged_table['A1_x'])
+        merged_table['A2_y'] = merged_table['A2_y'].fillna(merged_table['A2_x'])
+
+    # Assign A1 to be the one derived from the reference table:
+    merged_table['A1'] = merged_table['A1_x']
+    merged_table['A2'] = merged_table['A2_x']
+
+    # Detect cases where the correct allele is specified in both tables:
+    matching_allele = np.all(merged_table[['A1_x', 'A2_x']].values == merged_table[['A1_y', 'A2_y']].values, axis=1)
+
+    # Detect cases where the effect and reference alleles are flipped:
+    flip = np.all(merged_table[['A2_x', 'A1_x']].values == merged_table[['A1_y', 'A2_y']].values, axis=1)
+
+    # Variants to keep:
+    if correct_flips:
+        keep_snps = matching_allele | flip
+    else:
+        keep_snps = matching_allele
+
+    # Keep only SNPs with matching alleles or SNPs with flipped alleles:
+    merged_table = merged_table.loc[keep_snps, ]
+
+    if correct_flips:
+
+        flip = flip[keep_snps].astype(int)
+        num_flips = flip.sum()
+
+        if num_flips > 0:
+
+            # If the user provided a single signed statistic as a string, convert to list first:
+            if isinstance(signed_statistics, str):
+                signed_statistics = [signed_statistics]
+
+            # Loop over the signed statistics and correct them:
+            for s_stat in signed_statistics:
+                if s_stat in merged_table:
+                    merged_table[s_stat] = (-2. * flip + 1.) * merged_table[s_stat]
+
+            # Correct MAF:
+            if 'MAF' in merged_table:
+                merged_table['MAF'] = np.abs(flip - merged_table['MAF'])
+
+    merged_table.drop(['A1_x', 'A1_y', 'A2_x', 'A2_y'], axis=1, inplace=True)
+
+    return merged_table
+
+
+
+ +
+ + +
+ + + +

+ multinomial_rvs(n, p) + +

+ + +
+ +

Copied from Warren Weckesser: +https://stackoverflow.com/a/55830796

+

Sample from the multinomial distribution with multiple p vectors.

+
    +
  • n must be a scalar.
  • +
  • p must an n-dimensional numpy array, n >= 1. The last axis of p + holds the sequence of probabilities for a multinomial distribution.
  • +
+

The return value has the same shape as p.

+ +
+ Source code in magenpy/utils/model_utils.py +
def multinomial_rvs(n, p):
+    """
+    Copied from Warren Weckesser:
+    https://stackoverflow.com/a/55830796
+
+    Sample from the multinomial distribution with multiple p vectors.
+
+    * n must be a scalar.
+    * p must an n-dimensional numpy array, n >= 1.  The last axis of p
+      holds the sequence of probabilities for a multinomial distribution.
+
+    The return value has the same shape as p.
+    """
+    count = np.full(p.shape[:-1], n)
+    out = np.zeros(p.shape, dtype=int)
+    ps = p.cumsum(axis=-1)
+    # Conditional probabilities
+    with np.errstate(divide='ignore', invalid='ignore'):
+        condp = p / ps
+    condp[np.isnan(condp)] = 0.0
+    for i in range(p.shape[-1]-1, 0, -1):
+        binsample = np.random.binomial(count, condp[..., i])
+        out[..., i] = binsample
+        count -= binsample
+    out[..., 0] = count
+    return out
+
+
+
+ +
+ + +
+ + + +

+ quantize(floats, int_dtype=np.int8) + +

+ + +
+ +

Quantize floating point numbers to the specified integer type. +NOTE: Assumes that the floats are in the range [-1, 1].

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
floats + +
+

A numpy array of floats

+
+
+ required +
int_dtype + +
+

The integer type to quantize to.

+
+
+ int8 +
+ +
+ Source code in magenpy/utils/model_utils.py +
def quantize(floats, int_dtype=np.int8):
+    """
+    Quantize floating point numbers to the specified integer type.
+    NOTE: Assumes that the floats are in the range [-1, 1].
+    :param floats: A numpy array of floats
+    :param int_dtype: The integer type to quantize to.
+    """
+
+    # Infer the boundaries from the integer type
+    info = np.iinfo(int_dtype)
+
+    # Compute the scale and zero point
+    # NOTE: We add 1 to the info.min here to force the zero point to be exactly at 0.
+    # See discussions on Scale Quantization Mapping.
+    scale = 2. / (info.max - (info.min + 1))
+
+    # Quantize the floats to int
+    return np.clip((floats / scale).round(), info.min, info.max).astype(int_dtype)
+
+
+
+ +
+ + +
+ + + +

+ tree_to_rho(tree, min_corr) + +

+ + +
+ +

This function takes a Biopython tree and a minimum correlation +parameter and returns the correlation matrix for the effect sizes +across populations.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
tree + +
+

a Biopython Phylo object

+
+
+ required +
min_corr + +
+

minimum correlation

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+ +
+
+ +
+ Source code in magenpy/utils/model_utils.py +
def tree_to_rho(tree, min_corr):
+    """
+    This function takes a Biopython tree and a minimum correlation
+    parameter and returns the correlation matrix for the effect sizes
+    across populations.
+
+    :param tree: a Biopython Phylo object
+    :param min_corr: minimum correlation
+    :return:
+    """
+
+    max_depth = max(tree.depths().values())
+    tree.root.branch_length = min_corr*max_depth / (1. - min_corr)
+    max_depth = max(tree.depths().values())
+
+    for c in tree.find_clades():
+        c.branch_length /= max_depth
+
+    return tree.root.branch_length + get_shared_distance_matrix(tree)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/api/utils/system_utils/index.html b/api/utils/system_utils/index.html new file mode 100644 index 0000000..41f474e --- /dev/null +++ b/api/utils/system_utils/index.html @@ -0,0 +1,1453 @@ + + + + + + + + + + + + + + + + + + + System utils - magenpy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + +
+
+ + + +
+
+
+ + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + +

System utils

+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + +

+ available_cpu() + +

+ + +
+ +

Get the number of available CPUs on the system.

+ +
+ Source code in magenpy/utils/system_utils.py +
 9
+10
+11
+12
+13
def available_cpu():
+    """
+    Get the number of available CPUs on the system.
+    """
+    return psutil.cpu_count() - 1
+
+
+
+ +
+ + +
+ + + +

+ delete_temp_files(prefix) + +

+ + +
+ +

Delete temporary files with the given prefix.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
prefix + +
+

A string with the prefix of the temporary files to delete.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
def delete_temp_files(prefix):
+    """
+    Delete temporary files with the given `prefix`.
+    :param prefix: A string with the prefix of the temporary files to delete.
+    """
+    for f in glob.glob(f"{prefix}*"):
+        try:
+            os.remove(f)
+        except Exception as e:
+            continue
+
+
+
+ +
+ + +
+ + + +

+ get_filenames(path, extension=None) + +

+ + +
+ +

Obtain valid and full path names given the provided path or prefix and extensions.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
path + +
+

A string with the path prefix or full path.

+
+
+ required +
extension + +
+

The extension for the class of files to search for.

+
+
+ None +
+ +
+ Source code in magenpy/utils/system_utils.py +
def get_filenames(path, extension=None):
+    """
+    Obtain valid and full path names given the provided `path` or prefix and extensions.
+
+    :param path: A string with the path prefix or full path.
+    :param extension: The extension for the class of files to search for.
+    """
+
+    if osp.isdir(path):
+        if extension:
+            if osp.isfile(osp.join(path, extension)):
+                return [path]
+            else:
+                return [f for f in glob.glob(osp.join(path, '*/'))
+                        if extension in f or osp.isfile(osp.join(f, extension))]
+        else:
+            return glob.glob(osp.join(path, '*'))
+    else:
+        if extension is None:
+            return glob.glob(path + '*')
+        elif extension in path:
+            return glob.glob(path)
+        elif osp.isfile(path + extension):
+            return [path + extension]
+        else:
+            return (
+                    glob.glob(osp.join(path, '*' + extension + '*')) +
+                    glob.glob(osp.join(path, extension + '*')) +
+                    glob.glob(path + '*' + extension + '*')
+            )
+
+
+
+ +
+ + +
+ + + +

+ get_memory_usage() + +

+ + +
+ +

Get the memory usage of the current process in Mega Bytes (MB)

+ +
+ Source code in magenpy/utils/system_utils.py +
16
+17
+18
+19
+20
+21
+22
def get_memory_usage():
+    """
+    Get the memory usage of the current process in Mega Bytes (MB)
+    """
+    process = psutil.Process(os.getpid())
+    mem_info = process.memory_info()
+    return mem_info.rss / (1024 ** 2)
+
+
+
+ +
+ + +
+ + + +

+ is_cmd_tool(name) + +

+ + +
+ +

Check whether name is on PATH and marked as executable. +From: https://stackoverflow.com/a/34177358

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + +
+

A string with the name of the command-line tool.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
def is_cmd_tool(name):
+    """
+    Check whether `name` is on PATH and marked as executable.
+    From: https://stackoverflow.com/a/34177358
+    :param name: A string with the name of the command-line tool.
+    """
+    from shutil import which
+
+    return which(name) is not None
+
+
+
+ +
+ + +
+ + + +

+ is_path_writable(path) + +

+ + +
+ +

Check whether the user has write-access to the provided path. +This function supports checking for nested directories (i.e., +we iterate upwards until finding a parent directory that currently +exists, and we check the write-access of that directory).

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
path + +
+

A string with the path to check.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
def is_path_writable(path):
+    """
+    Check whether the user has write-access to the provided `path`.
+    This function supports checking for nested directories (i.e.,
+    we iterate upwards until finding a parent directory that currently
+    exists, and we check the write-access of that directory).
+    :param path: A string with the path to check.
+    """
+
+    # Get the absolute path first:
+    path = osp.abspath(path)
+
+    while True:
+
+        if osp.exists(path):
+            return os.access(path, os.W_OK)
+        else:
+            path = osp.dirname(path)
+            if path == '/' or len(path) == 0:
+                return False
+
+
+
+ +
+ + +
+ + + +

+ makedir(dirs) + +

+ + +
+ +

Create directories on the filesystem, recursively.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dirs + +
+

A string or list of strings with the paths to create.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
def makedir(dirs):
+    """
+    Create directories on the filesystem, recursively.
+    :param dirs: A string or list of strings with the paths to create.
+    """
+
+    if isinstance(dirs, str):
+        dirs = [dirs]
+
+    for dir_l in dirs:
+        try:
+            os.makedirs(dir_l)
+        except OSError as e:
+            if e.errno != errno.EEXIST:
+                raise
+
+
+
+ +
+ + +
+ + + +

+ run_shell_script(cmd) + +

+ + +
+ +

Run the shell script given the command prompt in cmd.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cmd + +
+

A string with the shell command to run.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
def run_shell_script(cmd):
+    """
+    Run the shell script given the command prompt in `cmd`.
+    :param cmd: A string with the shell command to run.
+    """
+
+    result = subprocess.run(cmd, shell=True, capture_output=True)
+
+    if result.stderr:
+        raise subprocess.CalledProcessError(
+            returncode=result.returncode,
+            cmd=result.args,
+            stderr=result.stderr
+        )
+
+    return result
+
+
+
+ +
+ + +
+ + + +

+ valid_url(path) + +

+ + +
+ +

Check whether the provided path is a valid URL.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
path + +
+

A string with the URL to check.

+
+
+ required +
+ +
+ Source code in magenpy/utils/system_utils.py +
25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
def valid_url(path):
+    """
+    Check whether the provided `path` is a valid URL.
+    :param path: A string with the URL to check.
+    """
+
+    import urllib.request
+
+    try:
+        with urllib.request.urlopen(path) as response:
+            return response.getcode() == 200  # Check if the response status is OK (HTTP 200)
+    except Exception:
+        return False
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ +
+ + + +
+
+
+
+ + + + + + + + + + \ No newline at end of file diff --git a/assets/_mkdocstrings.css b/assets/_mkdocstrings.css new file mode 100644 index 0000000..57a23e1 --- /dev/null +++ b/assets/_mkdocstrings.css @@ -0,0 +1,114 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Symbols in Navigation and ToC. */ +:root, +[data-md-color-scheme="default"] { + --doc-symbol-attribute-fg-color: #953800; + --doc-symbol-function-fg-color: #8250df; + --doc-symbol-method-fg-color: #8250df; + --doc-symbol-class-fg-color: #0550ae; + --doc-symbol-module-fg-color: #5cad0f; + + --doc-symbol-attribute-bg-color: #9538001a; + --doc-symbol-function-bg-color: #8250df1a; + --doc-symbol-method-bg-color: #8250df1a; + --doc-symbol-class-bg-color: #0550ae1a; + --doc-symbol-module-bg-color: #5cad0f1a; +} + +[data-md-color-scheme="slate"] { + --doc-symbol-attribute-fg-color: #ffa657; + --doc-symbol-function-fg-color: #d2a8ff; + --doc-symbol-method-fg-color: #d2a8ff; + --doc-symbol-class-fg-color: #79c0ff; + --doc-symbol-module-fg-color: #baff79; + + --doc-symbol-attribute-bg-color: #ffa6571a; + --doc-symbol-function-bg-color: #d2a8ff1a; + --doc-symbol-method-bg-color: #d2a8ff1a; + --doc-symbol-class-bg-color: #79c0ff1a; + --doc-symbol-module-bg-color: #baff791a; +} + +code.doc-symbol { + border-radius: .1rem; + font-size: .85em; + padding: 0 .3em; + font-weight: bold; +} + +code.doc-symbol-attribute { + color: var(--doc-symbol-attribute-fg-color); + background-color: var(--doc-symbol-attribute-bg-color); +} + +code.doc-symbol-attribute::after { + content: "attr"; +} + +code.doc-symbol-function { + color: var(--doc-symbol-function-fg-color); + background-color: var(--doc-symbol-function-bg-color); +} + +code.doc-symbol-function::after { + content: "func"; +} + +code.doc-symbol-method { + color: var(--doc-symbol-method-fg-color); + background-color: var(--doc-symbol-method-bg-color); +} + +code.doc-symbol-method::after { + content: "meth"; +} + +code.doc-symbol-class { + color: var(--doc-symbol-class-fg-color); + background-color: var(--doc-symbol-class-bg-color); +} + +code.doc-symbol-class::after { + content: "class"; +} + +code.doc-symbol-module { + color: var(--doc-symbol-module-fg-color); + background-color: var(--doc-symbol-module-bg-color); +} + +code.doc-symbol-module::after { + content: "mod"; +} + +.doc-signature .autorefs { + color: inherit; + border-bottom: 1px dotted currentcolor; +} diff --git a/assets/images/favicon.png b/assets/images/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..1cf13b9f9d978896599290a74f77d5dbe7d1655c GIT binary patch literal 1870 zcmV-U2eJ5xP)Gc)JR9QMau)O=X#!i9;T z37kk-upj^(fsR36MHs_+1RCI)NNu9}lD0S{B^g8PN?Ww(5|~L#Ng*g{WsqleV}|#l zz8@ri&cTzw_h33bHI+12+kK6WN$h#n5cD8OQt`5kw6p~9H3()bUQ8OS4Q4HTQ=1Ol z_JAocz`fLbT2^{`8n~UAo=#AUOf=SOq4pYkt;XbC&f#7lb$*7=$na!mWCQ`dBQsO0 zLFBSPj*N?#u5&pf2t4XjEGH|=pPQ8xh7tpx;US5Cx_Ju;!O`ya-yF`)b%TEt5>eP1ZX~}sjjA%FJF?h7cX8=b!DZl<6%Cv z*G0uvvU+vmnpLZ2paivG-(cd*y3$hCIcsZcYOGh{$&)A6*XX&kXZd3G8m)G$Zz-LV z^GF3VAW^Mdv!)4OM8EgqRiz~*Cji;uzl2uC9^=8I84vNp;ltJ|q-*uQwGp2ma6cY7 z;`%`!9UXO@fr&Ebapfs34OmS9^u6$)bJxrucutf>`dKPKT%%*d3XlFVKunp9 zasduxjrjs>f8V=D|J=XNZp;_Zy^WgQ$9WDjgY=z@stwiEBm9u5*|34&1Na8BMjjgf3+SHcr`5~>oz1Y?SW^=K z^bTyO6>Gar#P_W2gEMwq)ot3; zREHn~U&Dp0l6YT0&k-wLwYjb?5zGK`W6S2v+K>AM(95m2C20L|3m~rN8dprPr@t)5lsk9Hu*W z?pS990s;Ez=+Rj{x7p``4>+c0G5^pYnB1^!TL=(?HLHZ+HicG{~4F1d^5Awl_2!1jICM-!9eoLhbbT^;yHcefyTAaqRcY zmuctDopPT!%k+}x%lZRKnzykr2}}XfG_ne?nRQO~?%hkzo;@RN{P6o`&mMUWBYMTe z6i8ChtjX&gXl`nvrU>jah)2iNM%JdjqoaeaU%yVn!^70x-flljp6Q5tK}5}&X8&&G zX3fpb3E(!rH=zVI_9Gjl45w@{(ITqngWFe7@9{mX;tO25Z_8 zQHEpI+FkTU#4xu>RkN>b3Tnc3UpWzPXWm#o55GKF09j^Mh~)K7{QqbO_~(@CVq! zS<8954|P8mXN2MRs86xZ&Q4EfM@JB94b=(YGuk)s&^jiSF=t3*oNK3`rD{H`yQ?d; ztE=laAUoZx5?RC8*WKOj`%LXEkgDd>&^Q4M^z`%u0rg-It=hLCVsq!Z%^6eB-OvOT zFZ28TN&cRmgU}Elrnk43)!>Z1FCPL2K$7}gwzIc48NX}#!A1BpJP?#v5wkNprhV** z?Cpalt1oH&{r!o3eSKc&ap)iz2BTn_VV`4>9M^b3;(YY}4>#ML6{~(4mH+?%07*qo IM6N<$f(jP3KmY&$ literal 0 HcmV?d00001 diff --git a/assets/javascripts/bundle.1e8ae164.min.js b/assets/javascripts/bundle.1e8ae164.min.js new file mode 100644 index 0000000..2129798 --- /dev/null +++ b/assets/javascripts/bundle.1e8ae164.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var _i=Object.create;var br=Object.defineProperty;var Ai=Object.getOwnPropertyDescriptor;var Ci=Object.getOwnPropertyNames,Ft=Object.getOwnPropertySymbols,ki=Object.getPrototypeOf,vr=Object.prototype.hasOwnProperty,eo=Object.prototype.propertyIsEnumerable;var Zr=(e,t,r)=>t in e?br(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,F=(e,t)=>{for(var r in t||(t={}))vr.call(t,r)&&Zr(e,r,t[r]);if(Ft)for(var r of Ft(t))eo.call(t,r)&&Zr(e,r,t[r]);return e};var to=(e,t)=>{var r={};for(var o in e)vr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Ft)for(var o of Ft(e))t.indexOf(o)<0&&eo.call(e,o)&&(r[o]=e[o]);return r};var gr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Hi=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of Ci(t))!vr.call(e,n)&&n!==r&&br(e,n,{get:()=>t[n],enumerable:!(o=Ai(t,n))||o.enumerable});return e};var jt=(e,t,r)=>(r=e!=null?_i(ki(e)):{},Hi(t||!e||!e.__esModule?br(r,"default",{value:e,enumerable:!0}):r,e));var ro=(e,t,r)=>new Promise((o,n)=>{var i=c=>{try{s(r.next(c))}catch(p){n(p)}},a=c=>{try{s(r.throw(c))}catch(p){n(p)}},s=c=>c.done?o(c.value):Promise.resolve(c.value).then(i,a);s((r=r.apply(e,t)).next())});var no=gr((xr,oo)=>{(function(e,t){typeof xr=="object"&&typeof oo!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(xr,function(){"use strict";function e(r){var o=!0,n=!1,i=null,a={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function s(C){return!!(C&&C!==document&&C.nodeName!=="HTML"&&C.nodeName!=="BODY"&&"classList"in C&&"contains"in C.classList)}function c(C){var ct=C.type,Ne=C.tagName;return!!(Ne==="INPUT"&&a[ct]&&!C.readOnly||Ne==="TEXTAREA"&&!C.readOnly||C.isContentEditable)}function p(C){C.classList.contains("focus-visible")||(C.classList.add("focus-visible"),C.setAttribute("data-focus-visible-added",""))}function l(C){C.hasAttribute("data-focus-visible-added")&&(C.classList.remove("focus-visible"),C.removeAttribute("data-focus-visible-added"))}function f(C){C.metaKey||C.altKey||C.ctrlKey||(s(r.activeElement)&&p(r.activeElement),o=!0)}function u(C){o=!1}function h(C){s(C.target)&&(o||c(C.target))&&p(C.target)}function w(C){s(C.target)&&(C.target.classList.contains("focus-visible")||C.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(C.target))}function A(C){document.visibilityState==="hidden"&&(n&&(o=!0),Z())}function Z(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function te(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(C){C.target.nodeName&&C.target.nodeName.toLowerCase()==="html"||(o=!1,te())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",A,!0),Z(),r.addEventListener("focus",h,!0),r.addEventListener("blur",w,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var zr=gr((kt,Vr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof kt=="object"&&typeof Vr=="object"?Vr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof kt=="object"?kt.ClipboardJS=r():t.ClipboardJS=r()})(kt,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Li}});var a=i(279),s=i.n(a),c=i(370),p=i.n(c),l=i(817),f=i.n(l);function u(D){try{return document.execCommand(D)}catch(M){return!1}}var h=function(M){var O=f()(M);return u("cut"),O},w=h;function A(D){var M=document.documentElement.getAttribute("dir")==="rtl",O=document.createElement("textarea");O.style.fontSize="12pt",O.style.border="0",O.style.padding="0",O.style.margin="0",O.style.position="absolute",O.style[M?"right":"left"]="-9999px";var I=window.pageYOffset||document.documentElement.scrollTop;return O.style.top="".concat(I,"px"),O.setAttribute("readonly",""),O.value=D,O}var Z=function(M,O){var I=A(M);O.container.appendChild(I);var W=f()(I);return u("copy"),I.remove(),W},te=function(M){var O=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},I="";return typeof M=="string"?I=Z(M,O):M instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(M==null?void 0:M.type)?I=Z(M.value,O):(I=f()(M),u("copy")),I},J=te;function C(D){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?C=function(O){return typeof O}:C=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},C(D)}var ct=function(){var M=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},O=M.action,I=O===void 0?"copy":O,W=M.container,K=M.target,Ce=M.text;if(I!=="copy"&&I!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(K!==void 0)if(K&&C(K)==="object"&&K.nodeType===1){if(I==="copy"&&K.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if(I==="cut"&&(K.hasAttribute("readonly")||K.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(Ce)return J(Ce,{container:W});if(K)return I==="cut"?w(K):J(K,{container:W})},Ne=ct;function Pe(D){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Pe=function(O){return typeof O}:Pe=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},Pe(D)}function xi(D,M){if(!(D instanceof M))throw new TypeError("Cannot call a class as a function")}function Xr(D,M){for(var O=0;O0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof W.action=="function"?W.action:this.defaultAction,this.target=typeof W.target=="function"?W.target:this.defaultTarget,this.text=typeof W.text=="function"?W.text:this.defaultText,this.container=Pe(W.container)==="object"?W.container:document.body}},{key:"listenClick",value:function(W){var K=this;this.listener=p()(W,"click",function(Ce){return K.onClick(Ce)})}},{key:"onClick",value:function(W){var K=W.delegateTarget||W.currentTarget,Ce=this.action(K)||"copy",It=Ne({action:Ce,container:this.container,target:this.target(K),text:this.text(K)});this.emit(It?"success":"error",{action:Ce,text:It,trigger:K,clearSelection:function(){K&&K.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(W){return hr("action",W)}},{key:"defaultTarget",value:function(W){var K=hr("target",W);if(K)return document.querySelector(K)}},{key:"defaultText",value:function(W){return hr("text",W)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(W){var K=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(W,K)}},{key:"cut",value:function(W){return w(W)}},{key:"isSupported",value:function(){var W=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],K=typeof W=="string"?[W]:W,Ce=!!document.queryCommandSupported;return K.forEach(function(It){Ce=Ce&&!!document.queryCommandSupported(It)}),Ce}}]),O}(s()),Li=Mi},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function a(s,c){for(;s&&s.nodeType!==n;){if(typeof s.matches=="function"&&s.matches(c))return s;s=s.parentNode}}o.exports=a},438:function(o,n,i){var a=i(828);function s(l,f,u,h,w){var A=p.apply(this,arguments);return l.addEventListener(u,A,w),{destroy:function(){l.removeEventListener(u,A,w)}}}function c(l,f,u,h,w){return typeof l.addEventListener=="function"?s.apply(null,arguments):typeof u=="function"?s.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(A){return s(A,f,u,h,w)}))}function p(l,f,u,h){return function(w){w.delegateTarget=a(w.target,f),w.delegateTarget&&h.call(l,w)}}o.exports=c},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var a=Object.prototype.toString.call(i);return i!==void 0&&(a==="[object NodeList]"||a==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var a=Object.prototype.toString.call(i);return a==="[object Function]"}},370:function(o,n,i){var a=i(879),s=i(438);function c(u,h,w){if(!u&&!h&&!w)throw new Error("Missing required arguments");if(!a.string(h))throw new TypeError("Second argument must be a String");if(!a.fn(w))throw new TypeError("Third argument must be a Function");if(a.node(u))return p(u,h,w);if(a.nodeList(u))return l(u,h,w);if(a.string(u))return f(u,h,w);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function p(u,h,w){return u.addEventListener(h,w),{destroy:function(){u.removeEventListener(h,w)}}}function l(u,h,w){return Array.prototype.forEach.call(u,function(A){A.addEventListener(h,w)}),{destroy:function(){Array.prototype.forEach.call(u,function(A){A.removeEventListener(h,w)})}}}function f(u,h,w){return s(document.body,u,h,w)}o.exports=c},817:function(o){function n(i){var a;if(i.nodeName==="SELECT")i.focus(),a=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var s=i.hasAttribute("readonly");s||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),s||i.removeAttribute("readonly"),a=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var c=window.getSelection(),p=document.createRange();p.selectNodeContents(i),c.removeAllRanges(),c.addRange(p),a=c.toString()}return a}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,a,s){var c=this.e||(this.e={});return(c[i]||(c[i]=[])).push({fn:a,ctx:s}),this},once:function(i,a,s){var c=this;function p(){c.off(i,p),a.apply(s,arguments)}return p._=a,this.on(i,p,s)},emit:function(i){var a=[].slice.call(arguments,1),s=((this.e||(this.e={}))[i]||[]).slice(),c=0,p=s.length;for(c;c{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var Va=/["'&<>]/;qn.exports=za;function za(e){var t=""+e,r=Va.exec(t);if(!r)return t;var o,n="",i=0,a=0;for(i=r.index;i0&&i[i.length-1])&&(p[0]===6||p[0]===2)){r=0;continue}if(p[0]===3&&(!i||p[1]>i[0]&&p[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function V(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],a;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(s){a={error:s}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(a)throw a.error}}return i}function z(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||s(u,h)})})}function s(u,h){try{c(o[u](h))}catch(w){f(i[0][3],w)}}function c(u){u.value instanceof ot?Promise.resolve(u.value.v).then(p,l):f(i[0][2],u)}function p(u){s("next",u)}function l(u){s("throw",u)}function f(u,h){u(h),i.shift(),i.length&&s(i[0][0],i[0][1])}}function so(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof ue=="function"?ue(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(a){return new Promise(function(s,c){a=e[i](a),n(s,c,a.done,a.value)})}}function n(i,a,s,c){Promise.resolve(c).then(function(p){i({value:p,done:s})},a)}}function k(e){return typeof e=="function"}function pt(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var Wt=pt(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function Ve(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var Ie=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var a=this._parentage;if(a)if(this._parentage=null,Array.isArray(a))try{for(var s=ue(a),c=s.next();!c.done;c=s.next()){var p=c.value;p.remove(this)}}catch(A){t={error:A}}finally{try{c&&!c.done&&(r=s.return)&&r.call(s)}finally{if(t)throw t.error}}else a.remove(this);var l=this.initialTeardown;if(k(l))try{l()}catch(A){i=A instanceof Wt?A.errors:[A]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=ue(f),h=u.next();!h.done;h=u.next()){var w=h.value;try{co(w)}catch(A){i=i!=null?i:[],A instanceof Wt?i=z(z([],V(i)),V(A.errors)):i.push(A)}}}catch(A){o={error:A}}finally{try{h&&!h.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new Wt(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)co(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&Ve(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&Ve(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Er=Ie.EMPTY;function Dt(e){return e instanceof Ie||e&&"closed"in e&&k(e.remove)&&k(e.add)&&k(e.unsubscribe)}function co(e){k(e)?e():e.unsubscribe()}var ke={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var lt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,a=n.isStopped,s=n.observers;return i||a?Er:(this.currentObservers=null,s.push(r),new Ie(function(){o.currentObservers=null,Ve(s,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,a=o.isStopped;n?r.error(i):a&&r.complete()},t.prototype.asObservable=function(){var r=new j;return r.source=this,r},t.create=function(r,o){return new vo(r,o)},t}(j);var vo=function(e){se(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Er},t}(g);var St={now:function(){return(St.delegate||Date).now()},delegate:void 0};var Ot=function(e){se(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=St);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,a=o._infiniteTimeWindow,s=o._timestampProvider,c=o._windowTime;n||(i.push(r),!a&&i.push(s.now()+c)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,a=n._buffer,s=a.slice(),c=0;c0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var a=r.actions;o!=null&&((i=a[a.length-1])===null||i===void 0?void 0:i.id)!==o&&(ut.cancelAnimationFrame(o),r._scheduled=void 0)},t}(zt);var yo=function(e){se(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(qt);var de=new yo(xo);var L=new j(function(e){return e.complete()});function Kt(e){return e&&k(e.schedule)}function _r(e){return e[e.length-1]}function Je(e){return k(_r(e))?e.pop():void 0}function Ae(e){return Kt(_r(e))?e.pop():void 0}function Qt(e,t){return typeof _r(e)=="number"?e.pop():t}var dt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Yt(e){return k(e==null?void 0:e.then)}function Bt(e){return k(e[ft])}function Gt(e){return Symbol.asyncIterator&&k(e==null?void 0:e[Symbol.asyncIterator])}function Jt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Di(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Xt=Di();function Zt(e){return k(e==null?void 0:e[Xt])}function er(e){return ao(this,arguments,function(){var r,o,n,i;return Ut(this,function(a){switch(a.label){case 0:r=e.getReader(),a.label=1;case 1:a.trys.push([1,,9,10]),a.label=2;case 2:return[4,ot(r.read())];case 3:return o=a.sent(),n=o.value,i=o.done,i?[4,ot(void 0)]:[3,5];case 4:return[2,a.sent()];case 5:return[4,ot(n)];case 6:return[4,a.sent()];case 7:return a.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function tr(e){return k(e==null?void 0:e.getReader)}function N(e){if(e instanceof j)return e;if(e!=null){if(Bt(e))return Ni(e);if(dt(e))return Vi(e);if(Yt(e))return zi(e);if(Gt(e))return Eo(e);if(Zt(e))return qi(e);if(tr(e))return Ki(e)}throw Jt(e)}function Ni(e){return new j(function(t){var r=e[ft]();if(k(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Vi(e){return new j(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?b(function(n,i){return e(n,i,o)}):ce,ye(1),r?Qe(t):jo(function(){return new or}))}}function $r(e){return e<=0?function(){return L}:x(function(t,r){var o=[];t.subscribe(S(r,function(n){o.push(n),e=2,!0))}function le(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new g}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,a=i===void 0?!0:i,s=e.resetOnRefCountZero,c=s===void 0?!0:s;return function(p){var l,f,u,h=0,w=!1,A=!1,Z=function(){f==null||f.unsubscribe(),f=void 0},te=function(){Z(),l=u=void 0,w=A=!1},J=function(){var C=l;te(),C==null||C.unsubscribe()};return x(function(C,ct){h++,!A&&!w&&Z();var Ne=u=u!=null?u:r();ct.add(function(){h--,h===0&&!A&&!w&&(f=Pr(J,c))}),Ne.subscribe(ct),!l&&h>0&&(l=new it({next:function(Pe){return Ne.next(Pe)},error:function(Pe){A=!0,Z(),f=Pr(te,n,Pe),Ne.error(Pe)},complete:function(){w=!0,Z(),f=Pr(te,a),Ne.complete()}}),N(C).subscribe(l))})(p)}}function Pr(e,t){for(var r=[],o=2;oe.next(document)),e}function R(e,t=document){return Array.from(t.querySelectorAll(e))}function P(e,t=document){let r=me(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function me(e,t=document){return t.querySelector(e)||void 0}function Re(){var e,t,r,o;return(o=(r=(t=(e=document.activeElement)==null?void 0:e.shadowRoot)==null?void 0:t.activeElement)!=null?r:document.activeElement)!=null?o:void 0}var la=T(d(document.body,"focusin"),d(document.body,"focusout")).pipe(be(1),q(void 0),m(()=>Re()||document.body),B(1));function vt(e){return la.pipe(m(t=>e.contains(t)),Y())}function Vo(e,t){return T(d(e,"mouseenter").pipe(m(()=>!0)),d(e,"mouseleave").pipe(m(()=>!1))).pipe(t?be(t):ce,q(!1))}function Ue(e){return{x:e.offsetLeft,y:e.offsetTop}}function zo(e){return T(d(window,"load"),d(window,"resize")).pipe(Me(0,de),m(()=>Ue(e)),q(Ue(e)))}function ir(e){return{x:e.scrollLeft,y:e.scrollTop}}function et(e){return T(d(e,"scroll"),d(window,"resize")).pipe(Me(0,de),m(()=>ir(e)),q(ir(e)))}function qo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)qo(e,r)}function E(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)qo(o,n);return o}function ar(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function gt(e){let t=E("script",{src:e});return H(()=>(document.head.appendChild(t),T(d(t,"load"),d(t,"error").pipe(v(()=>Ar(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),_(()=>document.head.removeChild(t)),ye(1))))}var Ko=new g,ma=H(()=>typeof ResizeObserver=="undefined"?gt("https://unpkg.com/resize-observer-polyfill"):$(void 0)).pipe(m(()=>new ResizeObserver(e=>{for(let t of e)Ko.next(t)})),v(e=>T(qe,$(e)).pipe(_(()=>e.disconnect()))),B(1));function pe(e){return{width:e.offsetWidth,height:e.offsetHeight}}function Ee(e){return ma.pipe(y(t=>t.observe(e)),v(t=>Ko.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(()=>pe(e)))),q(pe(e)))}function xt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function sr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var Qo=new g,fa=H(()=>$(new IntersectionObserver(e=>{for(let t of e)Qo.next(t)},{threshold:0}))).pipe(v(e=>T(qe,$(e)).pipe(_(()=>e.disconnect()))),B(1));function yt(e){return fa.pipe(y(t=>t.observe(e)),v(t=>Qo.pipe(b(({target:r})=>r===e),_(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function Yo(e,t=16){return et(e).pipe(m(({y:r})=>{let o=pe(e),n=xt(e);return r>=n.height-o.height-t}),Y())}var cr={drawer:P("[data-md-toggle=drawer]"),search:P("[data-md-toggle=search]")};function Bo(e){return cr[e].checked}function Be(e,t){cr[e].checked!==t&&cr[e].click()}function We(e){let t=cr[e];return d(t,"change").pipe(m(()=>t.checked),q(t.checked))}function ua(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function da(){return T(d(window,"compositionstart").pipe(m(()=>!0)),d(window,"compositionend").pipe(m(()=>!1))).pipe(q(!1))}function Go(){let e=d(window,"keydown").pipe(b(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:Bo("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),b(({mode:t,type:r})=>{if(t==="global"){let o=Re();if(typeof o!="undefined")return!ua(o,r)}return!0}),le());return da().pipe(v(t=>t?L:e))}function ve(){return new URL(location.href)}function st(e,t=!1){if(G("navigation.instant")&&!t){let r=E("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function Jo(){return new g}function Xo(){return location.hash.slice(1)}function Zo(e){let t=E("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function ha(e){return T(d(window,"hashchange"),e).pipe(m(Xo),q(Xo()),b(t=>t.length>0),B(1))}function en(e){return ha(e).pipe(m(t=>me(`[id="${t}"]`)),b(t=>typeof t!="undefined"))}function At(e){let t=matchMedia(e);return nr(r=>t.addListener(()=>r(t.matches))).pipe(q(t.matches))}function tn(){let e=matchMedia("print");return T(d(window,"beforeprint").pipe(m(()=>!0)),d(window,"afterprint").pipe(m(()=>!1))).pipe(q(e.matches))}function Ur(e,t){return e.pipe(v(r=>r?t():L))}function Wr(e,t){return new j(r=>{let o=new XMLHttpRequest;return o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network error"))}),o.addEventListener("abort",()=>{r.complete()}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{var i;if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let a=(i=o.getResponseHeader("Content-Length"))!=null?i:0;t.progress$.next(n.loaded/+a*100)}}),t.progress$.next(5)),o.send(),()=>o.abort()})}function De(e,t){return Wr(e,t).pipe(v(r=>r.text()),m(r=>JSON.parse(r)),B(1))}function rn(e,t){let r=new DOMParser;return Wr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/html")),B(1))}function on(e,t){let r=new DOMParser;return Wr(e,t).pipe(v(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),B(1))}function nn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function an(){return T(d(window,"scroll",{passive:!0}),d(window,"resize",{passive:!0})).pipe(m(nn),q(nn()))}function sn(){return{width:innerWidth,height:innerHeight}}function cn(){return d(window,"resize",{passive:!0}).pipe(m(sn),q(sn()))}function pn(){return Q([an(),cn()]).pipe(m(([e,t])=>({offset:e,size:t})),B(1))}function pr(e,{viewport$:t,header$:r}){let o=t.pipe(X("size")),n=Q([o,r]).pipe(m(()=>Ue(e)));return Q([r,t,n]).pipe(m(([{height:i},{offset:a,size:s},{x:c,y:p}])=>({offset:{x:a.x-c,y:a.y-p+i},size:s})))}function ba(e){return d(e,"message",t=>t.data)}function va(e){let t=new g;return t.subscribe(r=>e.postMessage(r)),t}function ln(e,t=new Worker(e)){let r=ba(t),o=va(t),n=new g;n.subscribe(o);let i=o.pipe(ee(),oe(!0));return n.pipe(ee(),$e(r.pipe(U(i))),le())}var ga=P("#__config"),Et=JSON.parse(ga.textContent);Et.base=`${new URL(Et.base,ve())}`;function we(){return Et}function G(e){return Et.features.includes(e)}function ge(e,t){return typeof t!="undefined"?Et.translations[e].replace("#",t.toString()):Et.translations[e]}function Te(e,t=document){return P(`[data-md-component=${e}]`,t)}function ie(e,t=document){return R(`[data-md-component=${e}]`,t)}function xa(e){let t=P(".md-typeset > :first-child",e);return d(t,"click",{once:!0}).pipe(m(()=>P(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function mn(e){if(!G("announce.dismiss")||!e.childElementCount)return L;if(!e.hidden){let t=P(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return H(()=>{let t=new g;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),xa(e).pipe(y(r=>t.next(r)),_(()=>t.complete()),m(r=>F({ref:e},r)))})}function ya(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function fn(e,t){let r=new g;return r.subscribe(({hidden:o})=>{e.hidden=o}),ya(e,t).pipe(y(o=>r.next(o)),_(()=>r.complete()),m(o=>F({ref:e},o)))}function Ct(e,t){return t==="inline"?E("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},E("div",{class:"md-tooltip__inner md-typeset"})):E("div",{class:"md-tooltip",id:e,role:"tooltip"},E("div",{class:"md-tooltip__inner md-typeset"}))}function un(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return E("aside",{class:"md-annotation",tabIndex:0},Ct(t),E("a",{href:r,class:"md-annotation__index",tabIndex:-1},E("span",{"data-md-annotation-id":e})))}else return E("aside",{class:"md-annotation",tabIndex:0},Ct(t),E("span",{class:"md-annotation__index",tabIndex:-1},E("span",{"data-md-annotation-id":e})))}function dn(e){return E("button",{class:"md-clipboard md-icon",title:ge("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}function Dr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(c=>!e.terms[c]).reduce((c,p)=>[...c,E("del",null,p)," "],[]).slice(0,-1),i=we(),a=new URL(e.location,i.base);G("search.highlight")&&a.searchParams.set("h",Object.entries(e.terms).filter(([,c])=>c).reduce((c,[p])=>`${c} ${p}`.trim(),""));let{tags:s}=we();return E("a",{href:`${a}`,class:"md-search-result__link",tabIndex:-1},E("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&E("div",{class:"md-search-result__icon md-icon"}),r>0&&E("h1",null,e.title),r<=0&&E("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&e.tags.map(c=>{let p=s?c in s?`md-tag-icon md-tag--${s[c]}`:"md-tag-icon":"";return E("span",{class:`md-tag ${p}`},c)}),o>0&&n.length>0&&E("p",{class:"md-search-result__terms"},ge("search.result.term.missing"),": ",...n)))}function hn(e){let t=e[0].score,r=[...e],o=we(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),a=r.findIndex(l=>l.scoreDr(l,1)),...c.length?[E("details",{class:"md-search-result__more"},E("summary",{tabIndex:-1},E("div",null,c.length>0&&c.length===1?ge("search.result.more.one"):ge("search.result.more.other",c.length))),...c.map(l=>Dr(l,1)))]:[]];return E("li",{class:"md-search-result__item"},p)}function bn(e){return E("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>E("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?ar(r):r)))}function Nr(e){let t=`tabbed-control tabbed-control--${e}`;return E("div",{class:t,hidden:!0},E("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function vn(e){return E("div",{class:"md-typeset__scrollwrap"},E("div",{class:"md-typeset__table"},e))}function Ea(e){let t=we(),r=new URL(`../${e.version}/`,t.base);return E("li",{class:"md-version__item"},E("a",{href:`${r}`,class:"md-version__link"},e.title))}function gn(e,t){return e=e.filter(r=>{var o;return!((o=r.properties)!=null&&o.hidden)}),E("div",{class:"md-version"},E("button",{class:"md-version__current","aria-label":ge("select.version")},t.title),E("ul",{class:"md-version__list"},e.map(Ea)))}var wa=0;function Ta(e,t){document.body.append(e);let{width:r}=pe(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=sr(t),n=typeof o!="undefined"?et(o):$({x:0,y:0}),i=T(vt(t),Vo(t)).pipe(Y());return Q([i,n]).pipe(m(([a,s])=>{let{x:c,y:p}=Ue(t),l=pe(t),f=t.closest("table");return f&&t.parentElement&&(c+=f.offsetLeft+t.parentElement.offsetLeft,p+=f.offsetTop+t.parentElement.offsetTop),{active:a,offset:{x:c-s.x+l.width/2-r/2,y:p-s.y+l.height+8}}}))}function Ge(e){let t=e.title;if(!t.length)return L;let r=`__tooltip_${wa++}`,o=Ct(r,"inline"),n=P(".md-typeset",o);return n.innerHTML=t,H(()=>{let i=new g;return i.subscribe({next({offset:a}){o.style.setProperty("--md-tooltip-x",`${a.x}px`),o.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),T(i.pipe(b(({active:a})=>a)),i.pipe(be(250),b(({active:a})=>!a))).subscribe({next({active:a}){a?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Me(16,de)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(_t(125,de),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?o.style.setProperty("--md-tooltip-0",`${-a}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ta(o,e).pipe(y(a=>i.next(a)),_(()=>i.complete()),m(a=>F({ref:e},a)))}).pipe(ze(ae))}function Sa(e,t){let r=H(()=>Q([zo(e),et(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:a,height:s}=pe(e);return{x:o-i.x+a/2,y:n-i.y+s/2}}));return vt(e).pipe(v(o=>r.pipe(m(n=>({active:o,offset:n})),ye(+!o||1/0))))}function xn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return H(()=>{let i=new g,a=i.pipe(ee(),oe(!0));return i.subscribe({next({offset:s}){e.style.setProperty("--md-tooltip-x",`${s.x}px`),e.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),yt(e).pipe(U(a)).subscribe(s=>{e.toggleAttribute("data-md-visible",s)}),T(i.pipe(b(({active:s})=>s)),i.pipe(be(250),b(({active:s})=>!s))).subscribe({next({active:s}){s?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Me(16,de)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(_t(125,de),b(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?e.style.setProperty("--md-tooltip-0",`${-s}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),d(n,"click").pipe(U(a),b(s=>!(s.metaKey||s.ctrlKey))).subscribe(s=>{s.stopPropagation(),s.preventDefault()}),d(n,"mousedown").pipe(U(a),ne(i)).subscribe(([s,{active:c}])=>{var p;if(s.button!==0||s.metaKey||s.ctrlKey)s.preventDefault();else if(c){s.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(p=Re())==null||p.blur()}}),r.pipe(U(a),b(s=>s===o),Ye(125)).subscribe(()=>e.focus()),Sa(e,t).pipe(y(s=>i.next(s)),_(()=>i.complete()),m(s=>F({ref:e},s)))})}function Oa(e){return e.tagName==="CODE"?R(".c, .c1, .cm",e):[e]}function Ma(e){let t=[];for(let r of Oa(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let a;for(;a=/(\(\d+\))(!)?/.exec(i.textContent);){let[,s,c]=a;if(typeof c=="undefined"){let p=i.splitText(a.index);i=p.splitText(s.length),t.push(p)}else{i.textContent=s,t.push(i);break}}}}return t}function yn(e,t){t.append(...Array.from(e.childNodes))}function lr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,a=new Map;for(let s of Ma(t)){let[,c]=s.textContent.match(/\((\d+)\)/);me(`:scope > li:nth-child(${c})`,e)&&(a.set(c,un(c,i)),s.replaceWith(a.get(c)))}return a.size===0?L:H(()=>{let s=new g,c=s.pipe(ee(),oe(!0)),p=[];for(let[l,f]of a)p.push([P(".md-typeset",f),P(`:scope > li:nth-child(${l})`,e)]);return o.pipe(U(c)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of p)l?yn(f,u):yn(u,f)}),T(...[...a].map(([,l])=>xn(l,t,{target$:r}))).pipe(_(()=>s.complete()),le())})}function En(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return En(t)}}function wn(e,t){return H(()=>{let r=En(e);return typeof r!="undefined"?lr(r,e,t):L})}var Tn=jt(zr());var La=0;function Sn(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return Sn(t)}}function _a(e){return Ee(e).pipe(m(({width:t})=>({scrollable:xt(e).width>t})),X("scrollable"))}function On(e,t){let{matches:r}=matchMedia("(hover)"),o=H(()=>{let n=new g,i=n.pipe($r(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let a=[];if(Tn.default.isSupported()&&(e.closest(".copy")||G("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${La++}`;let p=dn(c.id);c.insertBefore(p,e),G("content.tooltips")&&a.push(Ge(p))}let s=e.closest(".highlight");if(s instanceof HTMLElement){let c=Sn(s);if(typeof c!="undefined"&&(s.classList.contains("annotate")||G("content.code.annotate"))){let p=lr(c,e,t);a.push(Ee(s).pipe(U(i),m(({width:l,height:f})=>l&&f),Y(),v(l=>l?p:L)))}}return _a(e).pipe(y(c=>n.next(c)),_(()=>n.complete()),m(c=>F({ref:e},c)),$e(...a))});return G("content.lazy")?yt(e).pipe(b(n=>n),ye(1),v(()=>o)):o}function Aa(e,{target$:t,print$:r}){let o=!0;return T(t.pipe(m(n=>n.closest("details:not([open])")),b(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(b(n=>n||!o),y(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Mn(e,t){return H(()=>{let r=new g;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),Aa(e,t).pipe(y(o=>r.next(o)),_(()=>r.complete()),m(o=>F({ref:e},o)))})}var Ln=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel rect,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel rect{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel,.nodeLabel p{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var qr,ka=0;function Ha(){return typeof mermaid=="undefined"||mermaid instanceof Element?gt("https://unpkg.com/mermaid@10.7.0/dist/mermaid.min.js"):$(void 0)}function _n(e){return e.classList.remove("mermaid"),qr||(qr=Ha().pipe(y(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Ln,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),B(1))),qr.subscribe(()=>ro(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${ka++}`,r=E("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),a=r.attachShadow({mode:"closed"});a.innerHTML=n,e.replaceWith(r),i==null||i(a)})),qr.pipe(m(()=>({ref:e})))}var An=E("table");function Cn(e){return e.replaceWith(An),An.replaceWith(vn(e)),$({ref:e})}function $a(e){let t=e.find(r=>r.checked)||e[0];return T(...e.map(r=>d(r,"change").pipe(m(()=>P(`label[for="${r.id}"]`))))).pipe(q(P(`label[for="${t.id}"]`)),m(r=>({active:r})))}function kn(e,{viewport$:t,target$:r}){let o=P(".tabbed-labels",e),n=R(":scope > input",e),i=Nr("prev");e.append(i);let a=Nr("next");return e.append(a),H(()=>{let s=new g,c=s.pipe(ee(),oe(!0));Q([s,Ee(e)]).pipe(U(c),Me(1,de)).subscribe({next([{active:p},l]){let f=Ue(p),{width:u}=pe(p);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let h=ir(o);(f.xh.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),Q([et(o),Ee(o)]).pipe(U(c)).subscribe(([p,l])=>{let f=xt(o);i.hidden=p.x<16,a.hidden=p.x>f.width-l.width-16}),T(d(i,"click").pipe(m(()=>-1)),d(a,"click").pipe(m(()=>1))).pipe(U(c)).subscribe(p=>{let{width:l}=pe(o);o.scrollBy({left:l*p,behavior:"smooth"})}),r.pipe(U(c),b(p=>n.includes(p))).subscribe(p=>p.click()),o.classList.add("tabbed-labels--linked");for(let p of n){let l=P(`label[for="${p.id}"]`);l.replaceChildren(E("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),d(l.firstElementChild,"click").pipe(U(c),b(f=>!(f.metaKey||f.ctrlKey)),y(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return G("content.tabs.link")&&s.pipe(Le(1),ne(t)).subscribe(([{active:p},{offset:l}])=>{let f=p.innerText.trim();if(p.hasAttribute("data-md-switching"))p.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let w of R("[data-tabs]"))for(let A of R(":scope > input",w)){let Z=P(`label[for="${A.id}"]`);if(Z!==p&&Z.innerText.trim()===f){Z.setAttribute("data-md-switching",""),A.click();break}}window.scrollTo({top:e.offsetTop-u});let h=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...h])])}}),s.pipe(U(c)).subscribe(()=>{for(let p of R("audio, video",e))p.pause()}),$a(n).pipe(y(p=>s.next(p)),_(()=>s.complete()),m(p=>F({ref:e},p)))}).pipe(ze(ae))}function Hn(e,{viewport$:t,target$:r,print$:o}){return T(...R(".annotate:not(.highlight)",e).map(n=>wn(n,{target$:r,print$:o})),...R("pre:not(.mermaid) > code",e).map(n=>On(n,{target$:r,print$:o})),...R("pre.mermaid",e).map(n=>_n(n)),...R("table:not([class])",e).map(n=>Cn(n)),...R("details",e).map(n=>Mn(n,{target$:r,print$:o})),...R("[data-tabs]",e).map(n=>kn(n,{viewport$:t,target$:r})),...R("[title]",e).filter(()=>G("content.tooltips")).map(n=>Ge(n)))}function Ra(e,{alert$:t}){return t.pipe(v(r=>T($(!0),$(!1).pipe(Ye(2e3))).pipe(m(o=>({message:r,active:o})))))}function $n(e,t){let r=P(".md-typeset",e);return H(()=>{let o=new g;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ra(e,t).pipe(y(n=>o.next(n)),_(()=>o.complete()),m(n=>F({ref:e},n)))})}function Pa({viewport$:e}){if(!G("header.autohide"))return $(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Ke(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),Y()),o=We("search");return Q([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),Y(),v(n=>n?r:$(!1)),q(!1))}function Rn(e,t){return H(()=>Q([Ee(e),Pa(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),Y((r,o)=>r.height===o.height&&r.hidden===o.hidden),B(1))}function Pn(e,{header$:t,main$:r}){return H(()=>{let o=new g,n=o.pipe(ee(),oe(!0));o.pipe(X("active"),je(t)).subscribe(([{active:a},{hidden:s}])=>{e.classList.toggle("md-header--shadow",a&&!s),e.hidden=s});let i=fe(R("[title]",e)).pipe(b(()=>G("content.tooltips")),re(a=>Ge(a)));return r.subscribe(o),t.pipe(U(n),m(a=>F({ref:e},a)),$e(i.pipe(U(n))))})}function Ia(e,{viewport$:t,header$:r}){return pr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=pe(e);return{active:o>=n}}),X("active"))}function In(e,t){return H(()=>{let r=new g;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=me(".md-content h1");return typeof o=="undefined"?L:Ia(o,t).pipe(y(n=>r.next(n)),_(()=>r.complete()),m(n=>F({ref:e},n)))})}function Fn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),Y()),n=o.pipe(v(()=>Ee(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),X("bottom"))));return Q([o,n,t]).pipe(m(([i,{top:a,bottom:s},{offset:{y:c},size:{height:p}}])=>(p=Math.max(0,p-Math.max(0,a-c,i)-Math.max(0,p+c-s)),{offset:a-i,height:p,active:a-i<=c})),Y((i,a)=>i.offset===a.offset&&i.height===a.height&&i.active===a.active))}function Fa(e){let t=__md_get("__palette")||{index:e.findIndex(o=>matchMedia(o.getAttribute("data-md-color-media")).matches)},r=Math.max(0,Math.min(t.index,e.length-1));return $(...e).pipe(re(o=>d(o,"change").pipe(m(()=>o))),q(e[r]),m(o=>({index:e.indexOf(o),color:{media:o.getAttribute("data-md-color-media"),scheme:o.getAttribute("data-md-color-scheme"),primary:o.getAttribute("data-md-color-primary"),accent:o.getAttribute("data-md-color-accent")}})),B(1))}function jn(e){let t=R("input",e),r=E("meta",{name:"theme-color"});document.head.appendChild(r);let o=E("meta",{name:"color-scheme"});document.head.appendChild(o);let n=At("(prefers-color-scheme: light)");return H(()=>{let i=new g;return i.subscribe(a=>{if(document.body.setAttribute("data-md-color-switching",""),a.color.media==="(prefers-color-scheme)"){let s=matchMedia("(prefers-color-scheme: light)"),c=document.querySelector(s.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");a.color.scheme=c.getAttribute("data-md-color-scheme"),a.color.primary=c.getAttribute("data-md-color-primary"),a.color.accent=c.getAttribute("data-md-color-accent")}for(let[s,c]of Object.entries(a.color))document.body.setAttribute(`data-md-color-${s}`,c);for(let s=0;sa.key==="Enter"),ne(i,(a,s)=>s)).subscribe(({index:a})=>{a=(a+1)%t.length,t[a].click(),t[a].focus()}),i.pipe(m(()=>{let a=Te("header"),s=window.getComputedStyle(a);return o.content=s.colorScheme,s.backgroundColor.match(/\d+/g).map(c=>(+c).toString(16).padStart(2,"0")).join("")})).subscribe(a=>r.content=`#${a}`),i.pipe(Oe(ae)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),Fa(t).pipe(U(n.pipe(Le(1))),at(),y(a=>i.next(a)),_(()=>i.complete()),m(a=>F({ref:e},a)))})}function Un(e,{progress$:t}){return H(()=>{let r=new g;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(y(o=>r.next({value:o})),_(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Kr=jt(zr());function ja(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Wn({alert$:e}){Kr.default.isSupported()&&new j(t=>{new Kr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||ja(P(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(y(t=>{t.trigger.focus()}),m(()=>ge("clipboard.copied"))).subscribe(e)}function Dn(e,t){return e.protocol=t.protocol,e.hostname=t.hostname,e}function Ua(e,t){let r=new Map;for(let o of R("url",e)){let n=P("loc",o),i=[Dn(new URL(n.textContent),t)];r.set(`${i[0]}`,i);for(let a of R("[rel=alternate]",o)){let s=a.getAttribute("href");s!=null&&i.push(Dn(new URL(s),t))}}return r}function mr(e){return on(new URL("sitemap.xml",e)).pipe(m(t=>Ua(t,new URL(e))),he(()=>$(new Map)))}function Wa(e,t){if(!(e.target instanceof Element))return L;let r=e.target.closest("a");if(r===null)return L;if(r.target||e.metaKey||e.ctrlKey)return L;let o=new URL(r.href);return o.search=o.hash="",t.has(`${o}`)?(e.preventDefault(),$(new URL(r.href))):L}function Nn(e){let t=new Map;for(let r of R(":scope > *",e.head))t.set(r.outerHTML,r);return t}function Vn(e){for(let t of R("[href], [src]",e))for(let r of["href","src"]){let o=t.getAttribute(r);if(o&&!/^(?:[a-z]+:)?\/\//i.test(o)){t[r]=t[r];break}}return $(e)}function Da(e){for(let o of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...G("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let n=me(o),i=me(o,e);typeof n!="undefined"&&typeof i!="undefined"&&n.replaceWith(i)}let t=Nn(document);for(let[o,n]of Nn(e))t.has(o)?t.delete(o):document.head.appendChild(n);for(let o of t.values()){let n=o.getAttribute("name");n!=="theme-color"&&n!=="color-scheme"&&o.remove()}let r=Te("container");return Fe(R("script",r)).pipe(v(o=>{let n=e.createElement("script");if(o.src){for(let i of o.getAttributeNames())n.setAttribute(i,o.getAttribute(i));return o.replaceWith(n),new j(i=>{n.onload=()=>i.complete()})}else return n.textContent=o.textContent,o.replaceWith(n),L}),ee(),oe(document))}function zn({location$:e,viewport$:t,progress$:r}){let o=we();if(location.protocol==="file:")return L;let n=mr(o.base);$(document).subscribe(Vn);let i=d(document.body,"click").pipe(je(n),v(([c,p])=>Wa(c,p)),le()),a=d(window,"popstate").pipe(m(ve),le());i.pipe(ne(t)).subscribe(([c,{offset:p}])=>{history.replaceState(p,""),history.pushState(null,"",c)}),T(i,a).subscribe(e);let s=e.pipe(X("pathname"),v(c=>rn(c,{progress$:r}).pipe(he(()=>(st(c,!0),L)))),v(Vn),v(Da),le());return T(s.pipe(ne(e,(c,p)=>p)),e.pipe(X("pathname"),v(()=>e),X("hash")),e.pipe(Y((c,p)=>c.pathname===p.pathname&&c.hash===p.hash),v(()=>i),y(()=>history.back()))).subscribe(c=>{var p,l;history.state!==null||!c.hash?window.scrollTo(0,(l=(p=history.state)==null?void 0:p.y)!=null?l:0):(history.scrollRestoration="auto",Zo(c.hash),history.scrollRestoration="manual")}),e.subscribe(()=>{history.scrollRestoration="manual"}),d(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),t.pipe(X("offset"),be(100)).subscribe(({offset:c})=>{history.replaceState(c,"")}),s}var Qn=jt(Kn());function Yn(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,a)=>`${i}${a}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return a=>(0,Qn.default)(a).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function Ht(e){return e.type===1}function fr(e){return e.type===3}function Bn(e,t){let r=ln(e);return T($(location.protocol!=="file:"),We("search")).pipe(He(o=>o),v(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:G("search.suggest")}}})),r}function Gn({document$:e}){let t=we(),r=De(new URL("../versions.json",t.base)).pipe(he(()=>L)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:a,aliases:s})=>a===i||s.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),v(n=>d(document.body,"click").pipe(b(i=>!i.metaKey&&!i.ctrlKey),ne(o),v(([i,a])=>{if(i.target instanceof Element){let s=i.target.closest("a");if(s&&!s.target&&n.has(s.href)){let c=s.href;return!i.target.closest(".md-version")&&n.get(c)===a?L:(i.preventDefault(),$(c))}}return L}),v(i=>{let{version:a}=n.get(i);return mr(new URL(i)).pipe(m(s=>{let p=ve().href.replace(t.base,"");return s.has(p.split("#")[0])?new URL(`../${a}/${p}`,t.base):new URL(i)}))})))).subscribe(n=>st(n,!0)),Q([r,o]).subscribe(([n,i])=>{P(".md-header__topic").appendChild(gn(n,i))}),e.pipe(v(()=>o)).subscribe(n=>{var a;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let s=((a=t.version)==null?void 0:a.default)||"latest";Array.isArray(s)||(s=[s]);e:for(let c of s)for(let p of n.aliases.concat(n.version))if(new RegExp(c,"i").test(p)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let s of ie("outdated"))s.hidden=!1})}function Ka(e,{worker$:t}){let{searchParams:r}=ve();r.has("q")&&(Be("search",!0),e.value=r.get("q"),e.focus(),We("search").pipe(He(i=>!i)).subscribe(()=>{let i=ve();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=vt(e),n=T(t.pipe(He(Ht)),d(e,"keyup"),o).pipe(m(()=>e.value),Y());return Q([n,o]).pipe(m(([i,a])=>({value:i,focus:a})),B(1))}function Jn(e,{worker$:t}){let r=new g,o=r.pipe(ee(),oe(!0));Q([t.pipe(He(Ht)),r],(i,a)=>a).pipe(X("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(X("focus")).subscribe(({focus:i})=>{i&&Be("search",i)}),d(e.form,"reset").pipe(U(o)).subscribe(()=>e.focus());let n=P("header [for=__search]");return d(n,"click").subscribe(()=>e.focus()),Ka(e,{worker$:t}).pipe(y(i=>r.next(i)),_(()=>r.complete()),m(i=>F({ref:e},i)),B(1))}function Xn(e,{worker$:t,query$:r}){let o=new g,n=Yo(e.parentElement).pipe(b(Boolean)),i=e.parentElement,a=P(":scope > :first-child",e),s=P(":scope > :last-child",e);We("search").subscribe(l=>s.setAttribute("role",l?"list":"presentation")),o.pipe(ne(r),Ir(t.pipe(He(Ht)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:a.textContent=f.length?ge("search.result.none"):ge("search.result.placeholder");break;case 1:a.textContent=ge("search.result.one");break;default:let u=ar(l.length);a.textContent=ge("search.result.other",u)}});let c=o.pipe(y(()=>s.innerHTML=""),v(({items:l})=>T($(...l.slice(0,10)),$(...l.slice(10)).pipe(Ke(4),jr(n),v(([f])=>f)))),m(hn),le());return c.subscribe(l=>s.appendChild(l)),c.pipe(re(l=>{let f=me("details",l);return typeof f=="undefined"?L:d(f,"toggle").pipe(U(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(b(fr),m(({data:l})=>l)).pipe(y(l=>o.next(l)),_(()=>o.complete()),m(l=>F({ref:e},l)))}function Qa(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=ve();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function Zn(e,t){let r=new g,o=r.pipe(ee(),oe(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),d(e,"click").pipe(U(o)).subscribe(n=>n.preventDefault()),Qa(e,t).pipe(y(n=>r.next(n)),_(()=>r.complete()),m(n=>F({ref:e},n)))}function ei(e,{worker$:t,keyboard$:r}){let o=new g,n=Te("search-query"),i=T(d(n,"keydown"),d(n,"focus")).pipe(Oe(ae),m(()=>n.value),Y());return o.pipe(je(i),m(([{suggest:s},c])=>{let p=c.split(/([\s-]+)/);if(s!=null&&s.length&&p[p.length-1]){let l=s[s.length-1];l.startsWith(p[p.length-1])&&(p[p.length-1]=l)}else p.length=0;return p})).subscribe(s=>e.innerHTML=s.join("").replace(/\s/g," ")),r.pipe(b(({mode:s})=>s==="search")).subscribe(s=>{switch(s.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(b(fr),m(({data:s})=>s)).pipe(y(s=>o.next(s)),_(()=>o.complete()),m(()=>({ref:e})))}function ti(e,{index$:t,keyboard$:r}){let o=we();try{let n=Bn(o.search,t),i=Te("search-query",e),a=Te("search-result",e);d(e,"click").pipe(b(({target:c})=>c instanceof Element&&!!c.closest("a"))).subscribe(()=>Be("search",!1)),r.pipe(b(({mode:c})=>c==="search")).subscribe(c=>{let p=Re();switch(c.type){case"Enter":if(p===i){let l=new Map;for(let f of R(":first-child [href]",a)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,h])=>h-u);f.click()}c.claim()}break;case"Escape":case"Tab":Be("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof p=="undefined")i.focus();else{let l=[i,...R(":not(details) > [href], summary, details[open] [href]",a)],f=Math.max(0,(Math.max(0,l.indexOf(p))+l.length+(c.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}c.claim();break;default:i!==Re()&&i.focus()}}),r.pipe(b(({mode:c})=>c==="global")).subscribe(c=>{switch(c.type){case"f":case"s":case"/":i.focus(),i.select(),c.claim();break}});let s=Jn(i,{worker$:n});return T(s,Xn(a,{worker$:n,query$:s})).pipe($e(...ie("search-share",e).map(c=>Zn(c,{query$:s})),...ie("search-suggest",e).map(c=>ei(c,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,qe}}function ri(e,{index$:t,location$:r}){return Q([t,r.pipe(q(ve()),b(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>Yn(o.config)(n.searchParams.get("h"))),m(o=>{var a;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let s=i.nextNode();s;s=i.nextNode())if((a=s.parentElement)!=null&&a.offsetHeight){let c=s.textContent,p=o(c);p.length>c.length&&n.set(s,p)}for(let[s,c]of n){let{childNodes:p}=E("span",null,c);s.replaceWith(...Array.from(p))}return{ref:e,nodes:n}}))}function Ya(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return Q([r,t]).pipe(m(([{offset:i,height:a},{offset:{y:s}}])=>(a=a+Math.min(n,Math.max(0,s-i))-n,{height:a,locked:s>=i+n})),Y((i,a)=>i.height===a.height&&i.locked===a.locked))}function Qr(e,o){var n=o,{header$:t}=n,r=to(n,["header$"]);let i=P(".md-sidebar__scrollwrap",e),{y:a}=Ue(i);return H(()=>{let s=new g,c=s.pipe(ee(),oe(!0)),p=s.pipe(Me(0,de));return p.pipe(ne(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*a}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),p.pipe(He()).subscribe(()=>{for(let l of R(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:h}=pe(f);f.scrollTo({top:u-h/2})}}}),fe(R("label[tabindex]",e)).pipe(re(l=>d(l,"click").pipe(Oe(ae),m(()=>l),U(c)))).subscribe(l=>{let f=P(`[id="${l.htmlFor}"]`);P(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),Ya(e,r).pipe(y(l=>s.next(l)),_(()=>s.complete()),m(l=>F({ref:e},l)))})}function oi(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return Lt(De(`${r}/releases/latest`).pipe(he(()=>L),m(o=>({version:o.tag_name})),Qe({})),De(r).pipe(he(()=>L),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),Qe({}))).pipe(m(([o,n])=>F(F({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return De(r).pipe(m(o=>({repositories:o.public_repos})),Qe({}))}}function ni(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return De(r).pipe(he(()=>L),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),Qe({}))}function ii(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return oi(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return ni(r,o)}return L}var Ba;function Ga(e){return Ba||(Ba=H(()=>{let t=__md_get("__source",sessionStorage);if(t)return $(t);if(ie("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return L}return ii(e.href).pipe(y(o=>__md_set("__source",o,sessionStorage)))}).pipe(he(()=>L),b(t=>Object.keys(t).length>0),m(t=>({facts:t})),B(1)))}function ai(e){let t=P(":scope > :last-child",e);return H(()=>{let r=new g;return r.subscribe(({facts:o})=>{t.appendChild(bn(o)),t.classList.add("md-source__repository--active")}),Ga(e).pipe(y(o=>r.next(o)),_(()=>r.complete()),m(o=>F({ref:e},o)))})}function Ja(e,{viewport$:t,header$:r}){return Ee(document.body).pipe(v(()=>pr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),X("hidden"))}function si(e,t){return H(()=>{let r=new g;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(G("navigation.tabs.sticky")?$({hidden:!1}):Ja(e,t)).pipe(y(o=>r.next(o)),_(()=>r.complete()),m(o=>F({ref:e},o)))})}function Xa(e,{viewport$:t,header$:r}){let o=new Map,n=R(".md-nav__link",e);for(let s of n){let c=decodeURIComponent(s.hash.substring(1)),p=me(`[id="${c}"]`);typeof p!="undefined"&&o.set(s,p)}let i=r.pipe(X("height"),m(({height:s})=>{let c=Te("main"),p=P(":scope > :first-child",c);return s+.8*(p.offsetTop-c.offsetTop)}),le());return Ee(document.body).pipe(X("height"),v(s=>H(()=>{let c=[];return $([...o].reduce((p,[l,f])=>{for(;c.length&&o.get(c[c.length-1]).tagName>=f.tagName;)c.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let h=f.offsetParent;for(;h;h=h.offsetParent)u+=h.offsetTop;return p.set([...c=[...c,l]].reverse(),u)},new Map))}).pipe(m(c=>new Map([...c].sort(([,p],[,l])=>p-l))),je(i),v(([c,p])=>t.pipe(Rr(([l,f],{offset:{y:u},size:h})=>{let w=u+h.height>=Math.floor(s.height);for(;f.length;){let[,A]=f[0];if(A-p=u&&!w)f=[l.pop(),...f];else break}return[l,f]},[[],[...c]]),Y((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([s,c])=>({prev:s.map(([p])=>p),next:c.map(([p])=>p)})),q({prev:[],next:[]}),Ke(2,1),m(([s,c])=>s.prev.length{let i=new g,a=i.pipe(ee(),oe(!0));if(i.subscribe(({prev:s,next:c})=>{for(let[p]of c)p.classList.remove("md-nav__link--passed"),p.classList.remove("md-nav__link--active");for(let[p,[l]]of s.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",p===s.length-1)}),G("toc.follow")){let s=T(t.pipe(be(1),m(()=>{})),t.pipe(be(250),m(()=>"smooth")));i.pipe(b(({prev:c})=>c.length>0),je(o.pipe(Oe(ae))),ne(s)).subscribe(([[{prev:c}],p])=>{let[l]=c[c.length-1];if(l.offsetHeight){let f=sr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:h}=pe(f);f.scrollTo({top:u-h/2,behavior:p})}}})}return G("navigation.tracking")&&t.pipe(U(a),X("offset"),be(250),Le(1),U(n.pipe(Le(1))),at({delay:250}),ne(i)).subscribe(([,{prev:s}])=>{let c=ve(),p=s[s.length-1];if(p&&p.length){let[l]=p,{hash:f}=new URL(l.href);c.hash!==f&&(c.hash=f,history.replaceState({},"",`${c}`))}else c.hash="",history.replaceState({},"",`${c}`)}),Xa(e,{viewport$:t,header$:r}).pipe(y(s=>i.next(s)),_(()=>i.complete()),m(s=>F({ref:e},s)))})}function Za(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:a}})=>a),Ke(2,1),m(([a,s])=>a>s&&s>0),Y()),i=r.pipe(m(({active:a})=>a));return Q([i,n]).pipe(m(([a,s])=>!(a&&s)),Y(),U(o.pipe(Le(1))),oe(!0),at({delay:250}),m(a=>({hidden:a})))}function pi(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new g,a=i.pipe(ee(),oe(!0));return i.subscribe({next({hidden:s}){e.hidden=s,s?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(U(a),X("height")).subscribe(({height:s})=>{e.style.top=`${s+16}px`}),d(e,"click").subscribe(s=>{s.preventDefault(),window.scrollTo({top:0})}),Za(e,{viewport$:t,main$:o,target$:n}).pipe(y(s=>i.next(s)),_(()=>i.complete()),m(s=>F({ref:e},s)))}function li({document$:e}){e.pipe(v(()=>R(".md-ellipsis")),re(t=>yt(t).pipe(U(e.pipe(Le(1))),b(r=>r),m(()=>t),ye(1))),b(t=>t.offsetWidth{let r=t.innerText,o=t.closest("a")||t;return o.title=r,Ge(o).pipe(U(e.pipe(Le(1))),_(()=>o.removeAttribute("title")))})).subscribe(),e.pipe(v(()=>R(".md-status")),re(t=>Ge(t))).subscribe()}function mi({document$:e,tablet$:t}){e.pipe(v(()=>R(".md-toggle--indeterminate")),y(r=>{r.indeterminate=!0,r.checked=!1}),re(r=>d(r,"change").pipe(Fr(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),ne(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function es(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function fi({document$:e}){e.pipe(v(()=>R("[data-md-scrollfix]")),y(t=>t.removeAttribute("data-md-scrollfix")),b(es),re(t=>d(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function ui({viewport$:e,tablet$:t}){Q([We("search"),t]).pipe(m(([r,o])=>r&&!o),v(r=>$(r).pipe(Ye(r?400:100))),ne(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function ts(){return location.protocol==="file:"?gt(`${new URL("search/search_index.js",Yr.base)}`).pipe(m(()=>__index),B(1)):De(new URL("search/search_index.json",Yr.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var rt=No(),Rt=Jo(),wt=en(Rt),Br=Go(),_e=pn(),ur=At("(min-width: 960px)"),hi=At("(min-width: 1220px)"),bi=tn(),Yr=we(),vi=document.forms.namedItem("search")?ts():qe,Gr=new g;Wn({alert$:Gr});var Jr=new g;G("navigation.instant")&&zn({location$:Rt,viewport$:_e,progress$:Jr}).subscribe(rt);var di;((di=Yr.version)==null?void 0:di.provider)==="mike"&&Gn({document$:rt});T(Rt,wt).pipe(Ye(125)).subscribe(()=>{Be("drawer",!1),Be("search",!1)});Br.pipe(b(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=me("link[rel=prev]");typeof t!="undefined"&&st(t);break;case"n":case".":let r=me("link[rel=next]");typeof r!="undefined"&&st(r);break;case"Enter":let o=Re();o instanceof HTMLLabelElement&&o.click()}});li({document$:rt});mi({document$:rt,tablet$:ur});fi({document$:rt});ui({viewport$:_e,tablet$:ur});var tt=Rn(Te("header"),{viewport$:_e}),$t=rt.pipe(m(()=>Te("main")),v(e=>Fn(e,{viewport$:_e,header$:tt})),B(1)),rs=T(...ie("consent").map(e=>fn(e,{target$:wt})),...ie("dialog").map(e=>$n(e,{alert$:Gr})),...ie("header").map(e=>Pn(e,{viewport$:_e,header$:tt,main$:$t})),...ie("palette").map(e=>jn(e)),...ie("progress").map(e=>Un(e,{progress$:Jr})),...ie("search").map(e=>ti(e,{index$:vi,keyboard$:Br})),...ie("source").map(e=>ai(e))),os=H(()=>T(...ie("announce").map(e=>mn(e)),...ie("content").map(e=>Hn(e,{viewport$:_e,target$:wt,print$:bi})),...ie("content").map(e=>G("search.highlight")?ri(e,{index$:vi,location$:Rt}):L),...ie("header-title").map(e=>In(e,{viewport$:_e,header$:tt})),...ie("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Ur(hi,()=>Qr(e,{viewport$:_e,header$:tt,main$:$t})):Ur(ur,()=>Qr(e,{viewport$:_e,header$:tt,main$:$t}))),...ie("tabs").map(e=>si(e,{viewport$:_e,header$:tt})),...ie("toc").map(e=>ci(e,{viewport$:_e,header$:tt,main$:$t,target$:wt})),...ie("top").map(e=>pi(e,{viewport$:_e,header$:tt,main$:$t,target$:wt})))),gi=rt.pipe(v(()=>os),$e(rs),B(1));gi.subscribe();window.document$=rt;window.location$=Rt;window.target$=wt;window.keyboard$=Br;window.viewport$=_e;window.tablet$=ur;window.screen$=hi;window.print$=bi;window.alert$=Gr;window.progress$=Jr;window.component$=gi;})(); +//# sourceMappingURL=bundle.1e8ae164.min.js.map + diff --git a/assets/javascripts/bundle.1e8ae164.min.js.map b/assets/javascripts/bundle.1e8ae164.min.js.map new file mode 100644 index 0000000..6c33b8e --- /dev/null +++ b/assets/javascripts/bundle.1e8ae164.min.js.map @@ -0,0 +1,7 @@ +{ + "version": 3, + "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/clipboard/dist/clipboard.js", "node_modules/escape-html/index.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/rxjs/node_modules/tslib/tslib.es6.js", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], + "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*\n * Copyright (c) 2016-2024 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/*! *****************************************************************************\r\nCopyright (c) Microsoft Corporation.\r\n\r\nPermission to use, copy, modify, and/or distribute this software for any\r\npurpose with or without fee is hereby granted.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\r\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\r\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\r\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\r\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\r\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\r\nPERFORMANCE OF THIS SOFTWARE.\r\n***************************************************************************** */\r\n/* global Reflect, Promise */\r\n\r\nvar extendStatics = function(d, b) {\r\n extendStatics = Object.setPrototypeOf ||\r\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\r\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\r\n return extendStatics(d, b);\r\n};\r\n\r\nexport function __extends(d, b) {\r\n if (typeof b !== \"function\" && b !== null)\r\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\r\n extendStatics(d, b);\r\n function __() { this.constructor = d; }\r\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\r\n}\r\n\r\nexport var __assign = function() {\r\n __assign = Object.assign || function __assign(t) {\r\n for (var s, i = 1, n = arguments.length; i < n; i++) {\r\n s = arguments[i];\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\r\n }\r\n return t;\r\n }\r\n return __assign.apply(this, arguments);\r\n}\r\n\r\nexport function __rest(s, e) {\r\n var t = {};\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\r\n t[p] = s[p];\r\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\r\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\r\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\r\n t[p[i]] = s[p[i]];\r\n }\r\n return t;\r\n}\r\n\r\nexport function __decorate(decorators, target, key, desc) {\r\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\r\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\r\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\r\n return c > 3 && r && Object.defineProperty(target, key, r), r;\r\n}\r\n\r\nexport function __param(paramIndex, decorator) {\r\n return function (target, key) { decorator(target, key, paramIndex); }\r\n}\r\n\r\nexport function __metadata(metadataKey, metadataValue) {\r\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\r\n}\r\n\r\nexport function __awaiter(thisArg, _arguments, P, generator) {\r\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\r\n return new (P || (P = Promise))(function (resolve, reject) {\r\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\r\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\r\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\r\n step((generator = generator.apply(thisArg, _arguments || [])).next());\r\n });\r\n}\r\n\r\nexport function __generator(thisArg, body) {\r\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\r\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\r\n function verb(n) { return function (v) { return step([n, v]); }; }\r\n function step(op) {\r\n if (f) throw new TypeError(\"Generator is already executing.\");\r\n while (_) try {\r\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\r\n if (y = 0, t) op = [op[0] & 2, t.value];\r\n switch (op[0]) {\r\n case 0: case 1: t = op; break;\r\n case 4: _.label++; return { value: op[1], done: false };\r\n case 5: _.label++; y = op[1]; op = [0]; continue;\r\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\r\n default:\r\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\r\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\r\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\r\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\r\n if (t[2]) _.ops.pop();\r\n _.trys.pop(); continue;\r\n }\r\n op = body.call(thisArg, _);\r\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\r\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\r\n }\r\n}\r\n\r\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });\r\n}) : (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n o[k2] = m[k];\r\n});\r\n\r\nexport function __exportStar(m, o) {\r\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\r\n}\r\n\r\nexport function __values(o) {\r\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\r\n if (m) return m.call(o);\r\n if (o && typeof o.length === \"number\") return {\r\n next: function () {\r\n if (o && i >= o.length) o = void 0;\r\n return { value: o && o[i++], done: !o };\r\n }\r\n };\r\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\r\n}\r\n\r\nexport function __read(o, n) {\r\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\r\n if (!m) return o;\r\n var i = m.call(o), r, ar = [], e;\r\n try {\r\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\r\n }\r\n catch (error) { e = { error: error }; }\r\n finally {\r\n try {\r\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\r\n }\r\n finally { if (e) throw e.error; }\r\n }\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spread() {\r\n for (var ar = [], i = 0; i < arguments.length; i++)\r\n ar = ar.concat(__read(arguments[i]));\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spreadArrays() {\r\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\r\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\r\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\r\n r[k] = a[j];\r\n return r;\r\n}\r\n\r\nexport function __spreadArray(to, from, pack) {\r\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\r\n if (ar || !(i in from)) {\r\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\r\n ar[i] = from[i];\r\n }\r\n }\r\n return to.concat(ar || Array.prototype.slice.call(from));\r\n}\r\n\r\nexport function __await(v) {\r\n return this instanceof __await ? (this.v = v, this) : new __await(v);\r\n}\r\n\r\nexport function __asyncGenerator(thisArg, _arguments, generator) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\r\n return i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i;\r\n function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }\r\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\r\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\r\n function fulfill(value) { resume(\"next\", value); }\r\n function reject(value) { resume(\"throw\", value); }\r\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\r\n}\r\n\r\nexport function __asyncDelegator(o) {\r\n var i, p;\r\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\r\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: n === \"return\" } : f ? f(v) : v; } : f; }\r\n}\r\n\r\nexport function __asyncValues(o) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var m = o[Symbol.asyncIterator], i;\r\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\r\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\r\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\r\n}\r\n\r\nexport function __makeTemplateObject(cooked, raw) {\r\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\r\n return cooked;\r\n};\r\n\r\nvar __setModuleDefault = Object.create ? (function(o, v) {\r\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\r\n}) : function(o, v) {\r\n o[\"default\"] = v;\r\n};\r\n\r\nexport function __importStar(mod) {\r\n if (mod && mod.__esModule) return mod;\r\n var result = {};\r\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\r\n __setModuleDefault(result, mod);\r\n return result;\r\n}\r\n\r\nexport function __importDefault(mod) {\r\n return (mod && mod.__esModule) ? mod : { default: mod };\r\n}\r\n\r\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\r\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\r\n}\r\n\r\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\r\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\r\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\r\n}\r\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an