From 69e58b8ddf78cd582217185aa137e4fe35b2ba0c Mon Sep 17 00:00:00 2001 From: hadim Date: Sat, 9 Dec 2023 12:01:11 +0000 Subject: [PATCH] Deployed f442be0 to 0.9.7 with MkDocs 1.5.3 and mike 2.0.0 --- 0.9.7/404.html | 1503 ++ 0.9.7/api/molfeat.calc.html | 20431 ++++++++++++++++ 0.9.7/api/molfeat.plugins.html | 3679 +++ 0.9.7/api/molfeat.store.html | 4967 ++++ 0.9.7/api/molfeat.trans.base.html | 6109 +++++ 0.9.7/api/molfeat.trans.concat.html | 3517 +++ 0.9.7/api/molfeat.trans.fp.html | 3004 +++ 0.9.7/api/molfeat.trans.graph.html | 6596 +++++ 0.9.7/api/molfeat.trans.pretrained.base.html | 2666 ++ ...lfeat.trans.pretrained.dgl_pretrained.html | 3071 +++ 0.9.7/api/molfeat.trans.pretrained.fcd.html | 1798 ++ .../molfeat.trans.pretrained.graphormer.html | 2665 ++ ...feat.trans.pretrained.hf_transformers.html | 3944 +++ 0.9.7/api/molfeat.trans.struct.html | 4090 ++++ 0.9.7/api/molfeat.utils.html | 12413 ++++++++++ 0.9.7/api/molfeat.viz.html | 2215 ++ 0.9.7/assets/_mkdocstrings.css | 64 + 0.9.7/assets/css/custom-molfeat.css | 103 + 0.9.7/assets/css/custom.css | 33 + 0.9.7/assets/css/tweak-width.css | 16 + 0.9.7/assets/images/favicon.png | Bin 0 -> 1870 bytes .../assets/javascripts/bundle.d7c377c4.min.js | 29 + .../javascripts/bundle.d7c377c4.min.js.map | 7 + .../javascripts/lunr/min/lunr.ar.min.js | 1 + .../javascripts/lunr/min/lunr.da.min.js | 18 + .../javascripts/lunr/min/lunr.de.min.js | 18 + .../javascripts/lunr/min/lunr.du.min.js | 18 + .../javascripts/lunr/min/lunr.el.min.js | 1 + .../javascripts/lunr/min/lunr.es.min.js | 18 + .../javascripts/lunr/min/lunr.fi.min.js | 18 + .../javascripts/lunr/min/lunr.fr.min.js | 18 + .../javascripts/lunr/min/lunr.he.min.js | 1 + .../javascripts/lunr/min/lunr.hi.min.js | 1 + .../javascripts/lunr/min/lunr.hu.min.js | 18 + .../javascripts/lunr/min/lunr.hy.min.js | 1 + .../javascripts/lunr/min/lunr.it.min.js | 18 + .../javascripts/lunr/min/lunr.ja.min.js | 1 + .../javascripts/lunr/min/lunr.jp.min.js | 1 + .../javascripts/lunr/min/lunr.kn.min.js | 1 + .../javascripts/lunr/min/lunr.ko.min.js | 1 + .../javascripts/lunr/min/lunr.multi.min.js | 1 + .../javascripts/lunr/min/lunr.nl.min.js | 18 + .../javascripts/lunr/min/lunr.no.min.js | 18 + .../javascripts/lunr/min/lunr.pt.min.js | 18 + .../javascripts/lunr/min/lunr.ro.min.js | 18 + .../javascripts/lunr/min/lunr.ru.min.js | 18 + .../javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + .../javascripts/lunr/min/lunr.sv.min.js | 18 + .../javascripts/lunr/min/lunr.ta.min.js | 1 + .../javascripts/lunr/min/lunr.te.min.js | 1 + .../javascripts/lunr/min/lunr.th.min.js | 1 + .../javascripts/lunr/min/lunr.tr.min.js | 18 + .../javascripts/lunr/min/lunr.vi.min.js | 1 + .../javascripts/lunr/min/lunr.zh.min.js | 1 + 0.9.7/assets/javascripts/lunr/tinyseg.js | 206 + 0.9.7/assets/javascripts/lunr/wordcut.js | 6708 +++++ .../workers/search.f886a092.min.js | 42 + .../workers/search.f886a092.min.js.map | 7 + 0.9.7/assets/js/google-analytics.js | 10 + .../assets/stylesheets/main.45e1311d.min.css | 1 + .../stylesheets/main.45e1311d.min.css.map | 1 + .../stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + 0.9.7/benchmark.html | 3341 +++ 0.9.7/community/contributions.html | 1707 ++ 0.9.7/community/get_involved.html | 1658 ++ 0.9.7/developers/contribute.html | 1785 ++ 0.9.7/developers/create-plugin.html | 1987 ++ 0.9.7/developers/register-plugin.html | 1728 ++ 0.9.7/images/logo-black.png | Bin 0 -> 18421 bytes 0.9.7/images/logo-black.svg | 5 + 0.9.7/images/logo-title.svg | 67 + 0.9.7/images/logo.png | Bin 0 -> 16917 bytes 0.9.7/images/logo.svg | 5 + 0.9.7/index.html | 1716 ++ 0.9.7/license.html | 1740 ++ 0.9.7/objects.inv | Bin 0 -> 4241 bytes 0.9.7/search/search_index.json | 1 + 0.9.7/sitemap.xml | 3 + 0.9.7/sitemap.xml.gz | Bin 0 -> 127 bytes 0.9.7/tutorials/add_your_own.html | 2755 +++ 0.9.7/tutorials/custom_model_store.html | 4053 +++ 0.9.7/tutorials/datacache.html | 2925 +++ 0.9.7/tutorials/graphs.html | 3627 +++ 0.9.7/tutorials/integrations.html | 2542 ++ 0.9.7/tutorials/pyg_integration.html | 2850 +++ 0.9.7/tutorials/save_and_load.html | 2385 ++ 0.9.7/tutorials/transformer_finetuning.html | 2958 +++ 0.9.7/tutorials/types_of_featurizers.html | 2882 +++ 0.9.7/usage.html | 1893 ++ versions.json | 5 + 92 files changed, 134802 insertions(+) create mode 100644 0.9.7/404.html create mode 100644 0.9.7/api/molfeat.calc.html create mode 100644 0.9.7/api/molfeat.plugins.html create mode 100644 0.9.7/api/molfeat.store.html create mode 100644 0.9.7/api/molfeat.trans.base.html create mode 100644 0.9.7/api/molfeat.trans.concat.html create mode 100644 0.9.7/api/molfeat.trans.fp.html create mode 100644 0.9.7/api/molfeat.trans.graph.html create mode 100644 0.9.7/api/molfeat.trans.pretrained.base.html create mode 100644 0.9.7/api/molfeat.trans.pretrained.dgl_pretrained.html create mode 100644 0.9.7/api/molfeat.trans.pretrained.fcd.html create mode 100644 0.9.7/api/molfeat.trans.pretrained.graphormer.html create mode 100644 0.9.7/api/molfeat.trans.pretrained.hf_transformers.html create mode 100644 0.9.7/api/molfeat.trans.struct.html create mode 100644 0.9.7/api/molfeat.utils.html create mode 100644 0.9.7/api/molfeat.viz.html create mode 100644 0.9.7/assets/_mkdocstrings.css create mode 100644 0.9.7/assets/css/custom-molfeat.css create mode 100644 0.9.7/assets/css/custom.css create mode 100644 0.9.7/assets/css/tweak-width.css create mode 100644 0.9.7/assets/images/favicon.png create mode 100644 0.9.7/assets/javascripts/bundle.d7c377c4.min.js create mode 100644 0.9.7/assets/javascripts/bundle.d7c377c4.min.js.map create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 0.9.7/assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 0.9.7/assets/javascripts/lunr/tinyseg.js create mode 100644 0.9.7/assets/javascripts/lunr/wordcut.js create mode 100644 0.9.7/assets/javascripts/workers/search.f886a092.min.js create mode 100644 0.9.7/assets/javascripts/workers/search.f886a092.min.js.map create mode 100644 0.9.7/assets/js/google-analytics.js create mode 100644 0.9.7/assets/stylesheets/main.45e1311d.min.css create mode 100644 0.9.7/assets/stylesheets/main.45e1311d.min.css.map create mode 100644 0.9.7/assets/stylesheets/palette.06af60db.min.css create mode 100644 0.9.7/assets/stylesheets/palette.06af60db.min.css.map create mode 100644 0.9.7/benchmark.html create mode 100644 0.9.7/community/contributions.html create mode 100644 0.9.7/community/get_involved.html create mode 100644 0.9.7/developers/contribute.html create mode 100644 0.9.7/developers/create-plugin.html create mode 100644 0.9.7/developers/register-plugin.html create mode 100644 0.9.7/images/logo-black.png create mode 100644 0.9.7/images/logo-black.svg create mode 100644 0.9.7/images/logo-title.svg create mode 100644 0.9.7/images/logo.png create mode 100644 0.9.7/images/logo.svg create mode 100644 0.9.7/index.html create mode 100644 0.9.7/license.html create mode 100644 0.9.7/objects.inv create mode 100644 0.9.7/search/search_index.json create mode 100644 0.9.7/sitemap.xml create mode 100644 0.9.7/sitemap.xml.gz create mode 100644 0.9.7/tutorials/add_your_own.html create mode 100644 0.9.7/tutorials/custom_model_store.html create mode 100644 0.9.7/tutorials/datacache.html create mode 100644 0.9.7/tutorials/graphs.html create mode 100644 0.9.7/tutorials/integrations.html create mode 100644 0.9.7/tutorials/pyg_integration.html create mode 100644 0.9.7/tutorials/save_and_load.html create mode 100644 0.9.7/tutorials/transformer_finetuning.html create mode 100644 0.9.7/tutorials/types_of_featurizers.html create mode 100644 0.9.7/usage.html diff --git a/0.9.7/404.html b/0.9.7/404.html new file mode 100644 index 0000000..01e5a28 --- /dev/null +++ b/0.9.7/404.html @@ -0,0 +1,1503 @@ + + + + + + + + + + + + + + + + + + + + + molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ +

404 - Not found

+ +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.calc.html b/0.9.7/api/molfeat.calc.html new file mode 100644 index 0000000..c913d4d --- /dev/null +++ b/0.9.7/api/molfeat.calc.html @@ -0,0 +1,20431 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.calc - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Calculators

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ get_calculator(name, **params) + +

+ + +
+ +

Get molecular calculator based on name

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + str + +
+

Name of the featurizer

+
+
+ required +
params + dict + +
+

Parameters of the featurizer

+
+
+ {} +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ ValueError + +
+

When featurizer is not supported

+
+
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
featurizer + +
+

Callable

+
+
+ +
+ Source code in molfeat/calc/__init__.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
def get_calculator(name: str, **params):
+    """Get molecular calculator based on name
+
+    Args:
+        name: Name of the featurizer
+        params (dict): Parameters of the featurizer
+
+    Raises:
+        ValueError: When featurizer is not supported
+
+    Returns:
+        featurizer: Callable
+    """
+    if not isinstance(name, str):
+        return name
+
+    CALC_MAP = {k.lower(): v for k, v in _CALCULATORS.items()}
+    name = name.lower()
+    if name in FP_FUNCS.keys():
+        featurizer = FPCalculator(name, **params)
+    elif name == "desc3d":
+        featurizer = RDKitDescriptors3D(**params)
+    elif name == "desc2d":
+        featurizer = RDKitDescriptors2D(**params)
+    elif name == "mordred":
+        featurizer = MordredDescriptors(**params)
+    elif name == "cats":
+        featurizer = CATS(**params)
+    elif name == "cats2d":
+        params["use_3d_distances"] = False
+        featurizer = CATS(**params)
+    elif name == "cats3d":
+        params["use_3d_distances"] = True
+        featurizer = CATS(**params)
+    elif name == "pharm2d":
+        featurizer = Pharmacophore2D(**params)
+    elif name == "pharm3d":
+        featurizer = Pharmacophore3D(**params)
+    elif name.startswith("usr"):
+        params["method"] = name
+        featurizer = USRDescriptors(**params)
+    elif name == "electroshape":
+        featurizer = ElectroShapeDescriptors(**params)
+    elif name in ["scaffoldkeys", "skeys", "scaffkeys"]:
+        featurizer = ScaffoldKeyCalculator(**params)
+    elif name == "none":
+        featurizer = None
+    # for any generic calculator that has been automatically registered
+    elif name in CALC_MAP.keys():
+        featurizer = CALC_MAP[name](**params)
+    else:
+        raise ValueError(f"{name} is not a supported internal featurizer")
+    return featurizer
+
+
+
+ +
+ + + +
+ +
+ +

+

Serializable Calculator are the base abstract class for implementing your calculators.

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ SerializableCalculator + + +

+ + +
+

+ Bases: ABC

+ + +

Interface to define a serializable calculator

+
+Subclassing SerializableCalculator +

When subclassing a calculator, you must implement the call method. +If your calculator also implements a batch_compute method, it will be used +by MoleculeTransformer to accelerate featurization.

+
from molfeat.calc import SerializableCalculator
+
+class MyCalculator(SerializableCalculator):
+
+    def __call__(self, mol, **kwargs):
+        # you have to implement this
+        ...
+
+    def __len__(self):
+        # you don't have to implement this but are encouraged to do so
+        # this is used to determine the length of the output
+        ...
+
+    @property
+    def columns(self):
+        # you don't have to implement this
+        # use this to return the name of each entry returned by your featurizer
+        ...
+
+    def batch_compute(self, mols:list, **dm_parallelized_kwargs):
+        # you don't need to implement this
+        # but you should if there is an efficient batching process
+        # By default dm.parallelized arguments will also be passed as input
+        ...
+
+
+ +
+ Source code in molfeat/calc/base.py +
 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
class SerializableCalculator(abc.ABC, metaclass=_CalculatorMeta):
+    """Interface to define a serializable calculator
+
+    ???+ tip "Subclassing SerializableCalculator"
+        When subclassing a calculator, you must implement the __call__ method.
+        If your calculator also implements a `batch_compute` method, it will be used
+        by `MoleculeTransformer` to accelerate featurization.
+
+        ```python
+        from molfeat.calc import SerializableCalculator
+
+        class MyCalculator(SerializableCalculator):
+
+            def __call__(self, mol, **kwargs):
+                # you have to implement this
+                ...
+
+            def __len__(self):
+                # you don't have to implement this but are encouraged to do so
+                # this is used to determine the length of the output
+                ...
+
+            @property
+            def columns(self):
+                # you don't have to implement this
+                # use this to return the name of each entry returned by your featurizer
+                ...
+
+            def batch_compute(self, mols:list, **dm_parallelized_kwargs):
+                # you don't need to implement this
+                # but you should if there is an efficient batching process
+                # By default dm.parallelized arguments will also be passed as input
+                ...
+        ```
+    """
+
+    @abc.abstractmethod
+    def __call__(self, *args, **kwargs):
+        pass
+
+    @classmethod
+    def from_state_dict(cls, state: dict, override_args: Optional[dict] = None):
+        """Load from state dictionary
+
+        Args:
+            state: dictionary to use to create the the calculator
+            overrride_args: optional dictionary of arguments to override the ones in the state dict
+                at construction of the new object
+        """
+        cls_name = state.get("name", cls.__name__)
+        module_name = state.get("module", cls.__module__)
+        module = importlib.import_module(module_name)
+        klass = getattr(module, cls_name)
+        kwargs = state["args"].copy()
+        kwargs.update(**(override_args or {}))
+        return klass(**kwargs)
+
+    def to_state_dict(self):
+        """Get the state dictionary"""
+        state_dict = {}
+        state_dict["name"] = self.__class__.__name__
+        state_dict["module"] = self.__class__.__module__
+        state_dict["args"] = self.__getstate__()
+        state_dict["_molfeat_version"] = MOLFEAT_VERSION
+        # we would like to keep input arguments as is.
+        signature = inspect.signature(self.__init__)
+        val = {k: v.default for k, v in signature.parameters.items()}
+        to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+        for k in to_remove:
+            state_dict["args"].pop(k)
+        return state_dict
+
+    def to_state_json(self) -> str:
+        """Output this instance as a JSON representation"""
+        return json.dumps(self.to_state_dict())
+
+    def to_state_yaml(self) -> str:
+        """Output this instance as a YAML representation"""
+        return yaml.dump(self.to_state_dict(), Dumper=yaml.SafeDumper)
+
+    def to_state_json_file(self, filepath: str):
+        """Save the state of this instance as a JSON file"""
+        with fsspec.open(filepath, "w") as f:
+            f.write(self.to_state_json())  # type: ignore
+
+    def to_state_yaml_file(self, filepath: str):
+        """Save the state of this instance as a YAML file"""
+        with fsspec.open(filepath, "w") as f:
+            f.write(self.to_state_yaml())  # type: ignore
+
+    @classmethod
+    def from_state_json(
+        cls,
+        state_json: str,
+        override_args: Optional[dict] = None,
+    ):
+        state_dict = yaml.safe_load(state_json)
+        return cls.from_state_dict(state_dict, override_args=override_args)
+
+    @classmethod
+    def from_state_yaml(
+        cls,
+        state_yaml: str,
+        override_args: Optional[dict] = None,
+    ):
+        state_dict = yaml.load(state_yaml, Loader=yaml.SafeLoader)
+        return cls.from_state_dict(state_dict, override_args=override_args)
+
+    @classmethod
+    def from_state_json_file(
+        cls,
+        filepath: str,
+        override_args: Optional[dict] = None,
+    ):
+        with fsspec.open(filepath, "r") as f:
+            featurizer = cls.from_state_json(f.read(), override_args=override_args)  # type: ignore
+        return featurizer
+
+    @classmethod
+    def from_state_yaml_file(
+        cls,
+        filepath: str,
+        override_args: Optional[dict] = None,
+    ):
+        with fsspec.open(filepath, "r") as f:
+            featurizer = cls.from_state_yaml(f.read(), override_args=override_args)  # type: ignore
+        return featurizer
+
+    @classmethod
+    def from_state_file(
+        cls,
+        state_path: str,
+        override_args: Optional[dict] = None,
+    ):
+        if state_path.endswith("yaml") or state_path.endswith("yml"):
+            return cls.from_state_yaml_file(filepath=state_path, override_args=override_args)
+        elif state_path.endswith("json"):
+            return cls.from_state_json_file(filepath=state_path, override_args=override_args)
+        elif state_path.endswith("pkl"):
+            with fsspec.open(state_path, "rb") as IN:
+                return joblib.load(IN)
+        raise ValueError(
+            "Only files with 'yaml' or 'json' format are allowed. "
+            "The filename must be ending with `yaml`, 'yml' or 'json'."
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ from_state_dict(state, override_args=None) + + + classmethod + + +

+ + +
+ +

Load from state dictionary

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
state + dict + +
+

dictionary to use to create the the calculator

+
+
+ required +
overrride_args + +
+

optional dictionary of arguments to override the ones in the state dict +at construction of the new object

+
+
+ required +
+ +
+ Source code in molfeat/calc/base.py +
71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
@classmethod
+def from_state_dict(cls, state: dict, override_args: Optional[dict] = None):
+    """Load from state dictionary
+
+    Args:
+        state: dictionary to use to create the the calculator
+        overrride_args: optional dictionary of arguments to override the ones in the state dict
+            at construction of the new object
+    """
+    cls_name = state.get("name", cls.__name__)
+    module_name = state.get("module", cls.__module__)
+    module = importlib.import_module(module_name)
+    klass = getattr(module, cls_name)
+    kwargs = state["args"].copy()
+    kwargs.update(**(override_args or {}))
+    return klass(**kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Get the state dictionary

+ +
+ Source code in molfeat/calc/base.py +
 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
def to_state_dict(self):
+    """Get the state dictionary"""
+    state_dict = {}
+    state_dict["name"] = self.__class__.__name__
+    state_dict["module"] = self.__class__.__module__
+    state_dict["args"] = self.__getstate__()
+    state_dict["_molfeat_version"] = MOLFEAT_VERSION
+    # we would like to keep input arguments as is.
+    signature = inspect.signature(self.__init__)
+    val = {k: v.default for k, v in signature.parameters.items()}
+    to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+    for k in to_remove:
+        state_dict["args"].pop(k)
+    return state_dict
+
+
+
+ +
+ + +
+ + + + +

+ to_state_json() + +

+ + +
+ +

Output this instance as a JSON representation

+ +
+ Source code in molfeat/calc/base.py +
103
+104
+105
def to_state_json(self) -> str:
+    """Output this instance as a JSON representation"""
+    return json.dumps(self.to_state_dict())
+
+
+
+ +
+ + +
+ + + + +

+ to_state_json_file(filepath) + +

+ + +
+ +

Save the state of this instance as a JSON file

+ +
+ Source code in molfeat/calc/base.py +
111
+112
+113
+114
def to_state_json_file(self, filepath: str):
+    """Save the state of this instance as a JSON file"""
+    with fsspec.open(filepath, "w") as f:
+        f.write(self.to_state_json())  # type: ignore
+
+
+
+ +
+ + +
+ + + + +

+ to_state_yaml() + +

+ + +
+ +

Output this instance as a YAML representation

+ +
+ Source code in molfeat/calc/base.py +
107
+108
+109
def to_state_yaml(self) -> str:
+    """Output this instance as a YAML representation"""
+    return yaml.dump(self.to_state_dict(), Dumper=yaml.SafeDumper)
+
+
+
+ +
+ + +
+ + + + +

+ to_state_yaml_file(filepath) + +

+ + +
+ +

Save the state of this instance as a YAML file

+ +
+ Source code in molfeat/calc/base.py +
116
+117
+118
+119
def to_state_yaml_file(self, filepath: str):
+    """Save the state of this instance as a YAML file"""
+    with fsspec.open(filepath, "w") as f:
+        f.write(self.to_state_yaml())  # type: ignore
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Fingerprints

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ FPCalculator + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Fingerprint bit calculator for a molecule

+ +
+ Source code in molfeat/calc/fingerprints.py +
198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
class FPCalculator(SerializableCalculator):
+    """Fingerprint bit calculator for a molecule"""
+
+    def __init__(
+        self,
+        method: str,
+        length: Optional[int] = None,
+        counting: bool = False,
+        **method_params,
+    ):
+        """Compute the given fingeprint for a molecule
+
+        !!! note
+            For efficiency reason, count fingerprints are hashed and potentially
+            re-folded and the count corresponds to the number of bits set to true
+
+        Args:
+            method (str): Name of the fingerprint method to use. See FPCalculator.available_fingerprints() for a list
+            length (int, optional): Length of the fingerprint. Defaults to None.
+                The default corresponds to the fingerpint default.
+            counting (bool, optional): Whether to use the count version of the fingerprint
+            method_params (dict): any parameters to the fingerprint algorithm.
+                See FPCalculator.default_parameters(method) for all the parameters required by a given method.
+        """
+        self.method = method.lower()
+        self.counting = counting or "-count" in self.method
+        if self.counting and "-count" not in self.method:
+            self.method = self.method + "-count"
+        self.input_length = length
+        if self.method not in FP_FUNCS:
+            raise ValueError(f"Method {self.method} is not a supported featurizer")
+        default_params = copy.deepcopy(FP_DEF_PARAMS[method])
+        unknown_params = set(method_params.keys()).difference(set(default_params.keys()))
+        if unknown_params:
+            logger.error(f"Params: {unknown_params} are not valid for {method}")
+        self.params = default_params
+        self.params.update(
+            {k: method_params[k] for k in method_params if k in default_params.keys()}
+        )
+        self._length = self._set_length(length)
+
+    @staticmethod
+    def available_fingerprints():
+        """Get the list of available fingerprints"""
+        return list(FP_FUNCS.keys())
+
+    @staticmethod
+    def default_parameters(method: str):
+        """Get the default parameters for a given fingerprint method
+
+        Args:
+            method: name of the fingerprint method
+        """
+        return FP_DEF_PARAMS[method].copy()
+
+    @property
+    def columns(self):
+        """
+        Get the name of all the descriptors of this calculator
+        """
+        return [f"fp_{i}" for i in range(self._length)]
+
+    def __len__(self):
+        """Return the length of the calculator"""
+        return self._length
+
+    def _set_length(self, length=None):
+        """Get the length of the featurizer"""
+        fplen = length
+        len_key = None
+        if self.method == "maccs":
+            fplen = 167
+        elif self.method == "estate":
+            fplen = 79
+        elif self.method == "erg":
+            fplen = 315
+        elif self.method == "rdkit-count" and not fplen:
+            fplen = 2048
+        elif "nBits" in self.params.keys():
+            len_key = "nBits"
+            fplen = self.params[len_key]
+        elif "n_permutations" in self.params.keys():
+            # special case for mhfp
+            len_key = "n_permutations"
+            fplen = self.params[len_key]
+        elif "fpSize" in self.params.keys():
+            len_key = "fpSize"
+            fplen = self.params[len_key]
+        elif "dimensions" in self.params.keys():
+            len_key = "dimensions"
+            fplen = self.params[len_key]
+        if len_key is not None and length:
+            self.params[len_key] = length
+            fplen = length
+        return fplen
+
+    def __call__(self, mol: Union[dm.Mol, str], raw: bool = False):
+        r"""
+        Compute the Fingerprint of a molecule
+
+        Args:
+            mol: the molecule of interest
+            raw: whether to keep original datatype or convert to numpy. Useful for rdkit's similarity functions
+
+        Returns:
+            props (np.ndarray): list of computed rdkit molecular descriptors
+        """
+        mol = dm.to_mol(mol)
+        fp_val = FP_FUNCS[self.method](mol, **self.params)
+        if self.counting:
+            fp_val = fold_count_fp(fp_val, self._length)
+        if not raw:
+            fp_val = to_numpy(fp_val)
+        if self.counting and raw:
+            # converint the counted values to SparseInt again
+            fp_val = to_fp(fp_val, bitvect=False)
+        return fp_val
+
+    def __getstate__(self):
+        # EN: note that the state is standardized with all the parameter
+        # because of the possibility of default changing after
+        state = {}
+        state["length"] = self.input_length
+        state["input_length"] = self.input_length
+        state["method"] = self.method
+        state["counting"] = self.counting
+        state["params"] = self.params
+        return state
+
+    def __setstate__(self, state: dict):
+        """Set the state of the featurizer"""
+        self.__dict__.update(state)
+        self._length = self._set_length(self.input_length)
+
+    def to_state_dict(self):
+        """Get the state dictionary"""
+        state_dict = super().to_state_dict()
+        cur_params = self.params
+        default_params = copy.deepcopy(FP_DEF_PARAMS[state_dict["args"]["method"]])
+        state_dict["args"].update(
+            {
+                k: cur_params[k]
+                for k in cur_params
+                if (cur_params[k] != default_params[k] and cur_params[k] is not None)
+            }
+        )
+        # we want to keep all the additional parameters in the state dict
+        return state_dict
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the name of all the descriptors of this calculator

+
+ +
+ + + + +
+ + + + +

+ __call__(mol, raw=False) + +

+ + +
+ +

Compute the Fingerprint of a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
raw + bool + +
+

whether to keep original datatype or convert to numpy. Useful for rdkit's similarity functions

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
props + ndarray + +
+

list of computed rdkit molecular descriptors

+
+
+ +
+ Source code in molfeat/calc/fingerprints.py +
294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
def __call__(self, mol: Union[dm.Mol, str], raw: bool = False):
+    r"""
+    Compute the Fingerprint of a molecule
+
+    Args:
+        mol: the molecule of interest
+        raw: whether to keep original datatype or convert to numpy. Useful for rdkit's similarity functions
+
+    Returns:
+        props (np.ndarray): list of computed rdkit molecular descriptors
+    """
+    mol = dm.to_mol(mol)
+    fp_val = FP_FUNCS[self.method](mol, **self.params)
+    if self.counting:
+        fp_val = fold_count_fp(fp_val, self._length)
+    if not raw:
+        fp_val = to_numpy(fp_val)
+    if self.counting and raw:
+        # converint the counted values to SparseInt again
+        fp_val = to_fp(fp_val, bitvect=False)
+    return fp_val
+
+
+
+ +
+ + +
+ + + + +

+ __init__(method, length=None, counting=False, **method_params) + +

+ + +
+ +

Compute the given fingeprint for a molecule

+
+

Note

+

For efficiency reason, count fingerprints are hashed and potentially +re-folded and the count corresponds to the number of bits set to true

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
method + str + +
+

Name of the fingerprint method to use. See FPCalculator.available_fingerprints() for a list

+
+
+ required +
length + int + +
+

Length of the fingerprint. Defaults to None. +The default corresponds to the fingerpint default.

+
+
+ None +
counting + bool + +
+

Whether to use the count version of the fingerprint

+
+
+ False +
method_params + dict + +
+

any parameters to the fingerprint algorithm. +See FPCalculator.default_parameters(method) for all the parameters required by a given method.

+
+
+ {} +
+ +
+ Source code in molfeat/calc/fingerprints.py +
201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
def __init__(
+    self,
+    method: str,
+    length: Optional[int] = None,
+    counting: bool = False,
+    **method_params,
+):
+    """Compute the given fingeprint for a molecule
+
+    !!! note
+        For efficiency reason, count fingerprints are hashed and potentially
+        re-folded and the count corresponds to the number of bits set to true
+
+    Args:
+        method (str): Name of the fingerprint method to use. See FPCalculator.available_fingerprints() for a list
+        length (int, optional): Length of the fingerprint. Defaults to None.
+            The default corresponds to the fingerpint default.
+        counting (bool, optional): Whether to use the count version of the fingerprint
+        method_params (dict): any parameters to the fingerprint algorithm.
+            See FPCalculator.default_parameters(method) for all the parameters required by a given method.
+    """
+    self.method = method.lower()
+    self.counting = counting or "-count" in self.method
+    if self.counting and "-count" not in self.method:
+        self.method = self.method + "-count"
+    self.input_length = length
+    if self.method not in FP_FUNCS:
+        raise ValueError(f"Method {self.method} is not a supported featurizer")
+    default_params = copy.deepcopy(FP_DEF_PARAMS[method])
+    unknown_params = set(method_params.keys()).difference(set(default_params.keys()))
+    if unknown_params:
+        logger.error(f"Params: {unknown_params} are not valid for {method}")
+    self.params = default_params
+    self.params.update(
+        {k: method_params[k] for k in method_params if k in default_params.keys()}
+    )
+    self._length = self._set_length(length)
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Return the length of the calculator

+ +
+ Source code in molfeat/calc/fingerprints.py +
260
+261
+262
def __len__(self):
+    """Return the length of the calculator"""
+    return self._length
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(state) + +

+ + +
+ +

Set the state of the featurizer

+ +
+ Source code in molfeat/calc/fingerprints.py +
327
+328
+329
+330
def __setstate__(self, state: dict):
+    """Set the state of the featurizer"""
+    self.__dict__.update(state)
+    self._length = self._set_length(self.input_length)
+
+
+
+ +
+ + +
+ + + + +

+ available_fingerprints() + + + staticmethod + + +

+ + +
+ +

Get the list of available fingerprints

+ +
+ Source code in molfeat/calc/fingerprints.py +
239
+240
+241
+242
@staticmethod
+def available_fingerprints():
+    """Get the list of available fingerprints"""
+    return list(FP_FUNCS.keys())
+
+
+
+ +
+ + +
+ + + + +

+ default_parameters(method) + + + staticmethod + + +

+ + +
+ +

Get the default parameters for a given fingerprint method

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
method + str + +
+

name of the fingerprint method

+
+
+ required +
+ +
+ Source code in molfeat/calc/fingerprints.py +
244
+245
+246
+247
+248
+249
+250
+251
@staticmethod
+def default_parameters(method: str):
+    """Get the default parameters for a given fingerprint method
+
+    Args:
+        method: name of the fingerprint method
+    """
+    return FP_DEF_PARAMS[method].copy()
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Get the state dictionary

+ +
+ Source code in molfeat/calc/fingerprints.py +
332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
def to_state_dict(self):
+    """Get the state dictionary"""
+    state_dict = super().to_state_dict()
+    cur_params = self.params
+    default_params = copy.deepcopy(FP_DEF_PARAMS[state_dict["args"]["method"]])
+    state_dict["args"].update(
+        {
+            k: cur_params[k]
+            for k in cur_params
+            if (cur_params[k] != default_params[k] and cur_params[k] is not None)
+        }
+    )
+    # we want to keep all the additional parameters in the state dict
+    return state_dict
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

CATS

+ + +
+ + + + +
+ +

CATS 2D and 3D implementation based on original work by +Rajarshi Guha rguha@indiana.edu 08/26/07 and Chris Arthur 1/11/2015 Rdkit port +This version modernizes the code, improve performance, add supports for 3D +as well as allowing distance binning. +see: https://masterchemoinfo.u-strasbg.fr/Documents/Conferences/Lecture1_Pharmacophores_Schneider.pdf

+ + + +
+ + + + + + + + +
+ + + + +

+ CATS + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Cats descriptors calculator based on PPPs (potential pharmacophore points). Can be either 2D or 3D.

+

!!! note: + We need to consider all pairwise combination of the 6 PPPs described in CATS2D.SMARTS + which would be $P(6,2) + 6$. However, as we only consider lexicographic order, the total size + is then $ rac{P(6,2)}{2} + 6 = 21$, explaining the size of CATS2D.DESCRIPTORS

+
+

Tip

+

The CATS descriptor are sensitive to the number of atoms in a molecule, meaning, you would get different +results if you add or remove hydrogen atoms

+
+ +
+ Source code in molfeat/calc/cats.py +
 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
class CATS(SerializableCalculator):
+    """Cats descriptors calculator based on PPPs (potential pharmacophore points). Can be either 2D or 3D.
+
+    !!! note:
+        We need to consider all pairwise combination of the 6 PPPs described in `CATS2D.SMARTS`
+        which would be $P(6,2) + 6$. However, as we only consider lexicographic order, the total size
+        is then $\frac{P(6,2)}{2} + 6 = 21$, explaining the size of `CATS2D.DESCRIPTORS`
+
+    !!! tip
+        The CATS descriptor are sensitive to the number of atoms in a molecule, meaning, you would get different
+        results if you add or remove hydrogen atoms
+
+    """
+
+    SMARTS = {
+        "D": ["[!$([#6,H0,-,-2,-3])]"],
+        "A": ["[!$([#6,F,Cl,Br,I,o,s,nX3,#7v5,#15v5,#16v4,#16v6,*+1,*+2,*+3])]"],
+        "P": ["[*+]", "[#7H2]"],
+        "N": ["[*-]", "[C&$(C(=O)O),P&$(P(=O)),S&$(S(=O)O)]"],
+        "L": [
+            "[Cl,Br,I]",
+            "[S;D2;$(S(C)(C))]",
+            "[C;D2;$(C(=C)(=C))]",
+            "[C;D3;$(C(=C)(C)(C))]",
+            "[C;D4;$(C(C)(C)(C)(C))]",
+            "[C;D3;H1;$(C(C)(C)(C))]",
+            "[C;D2;H2;$(C(C)(C))]",
+        ],
+        "R": ["[a]"],
+    }
+
+    DESCRIPTORS = [
+        "DD",
+        "AD",
+        "DP",
+        "DN",
+        "DL",
+        "DR",
+        "AA",
+        "AP",
+        "AN",
+        "AL",
+        "AR",
+        "PP",
+        "NP",
+        "LP",
+        "PR",
+        "NN",
+        "LN",
+        "NR",
+        "LL",
+        "LR",
+        "RR",
+    ]
+
+    MAX_DIST_DEFAULT_2D = 8
+    MAX_DIST_DEFAULT_3D = 5
+
+    def __init__(
+        self,
+        max_dist: Union[float, int] = None,
+        bins: List[int] = None,
+        scale: str = "raw",
+        use_3d_distances: bool = False,
+        **kwargs,
+    ):
+        """Calculator for the CATS descriptors.
+
+        `max_dist` and `bins` will both determine the length of the fingerprint vector,
+        which you can get by calling `len(calc)`
+
+        Args:
+            max_dist: Maximum distance between pairs. When set to None, the default for 2D is
+                set to `max_dist=8` and for 3D to `max_dist=5`.
+            bins: Bins to use. Defaults to equal spacing `[0, max_dist[`.
+            scale: How to scale the values. Supported values are:
+                 - 'raw' for the raw values.
+                 - 'num' for values normalized by the number of atoms.
+                 - 'count' for scaling based on occurence of the PPP.
+            use_3d_distances: Whether to use the 3D distances instead of the topological distances.
+                If set to True, the input molecules must contain a conformer.
+            kwargs: silently ignored extra parameters for compatibility with other calculators.
+        """
+
+        # Set the max_dist default is set to None
+        if max_dist is None:
+            if use_3d_distances:
+                max_dist = CATS.MAX_DIST_DEFAULT_3D
+            else:
+                max_dist = CATS.MAX_DIST_DEFAULT_2D
+
+        self.max_dist = max_dist
+        self.use_3d_distances = use_3d_distances
+
+        if bins is None:
+            bins = list(np.arange(1, np.floor(self.max_dist + 1), 1))
+
+        # we don't allow interaction that exceed our distance threshold.
+        bins = [x for x in bins if x <= self.max_dist]
+
+        # we start distance indexing at 0
+        if 0 not in bins:
+            bins += [0]
+
+        self.bins = list(sorted(bins))
+
+        self.scale = scale
+
+        self._set_columns()
+
+    def _set_columns(self):
+        self._columns = []
+        for label in self.DESCRIPTORS:
+            for i in range(len(self.bins)):
+                self._columns.append(f"{label}.bins-{i}")
+
+    @classmethod
+    @functools.lru_cache(maxsize=None)
+    def _pattern_to_mols(cls, smarts_dict=None):
+        """Convert dict of list of smarts to rdkit molecules"""
+
+        if smarts_dict is None:
+            smarts_dict = cls.SMARTS
+
+        smarts_mols = ddict(list)
+        for label, patterns in smarts_dict.items():
+            patterns = [dm.from_smarts(patt) for patt in patterns]
+            smarts_mols[label] = patterns
+
+        return smarts_mols
+
+    def _get_pcore_group(self, mol: Union[dm.Mol, str]):
+        """
+        Assign a PPP (potential pharmacophore points) to individual atoms of a molecule.
+
+        !!! note
+            The return value is a list of length `N_atoms` of the
+            input molecule. The i'th element of the list contains
+            a list of PPP labels that were identified for the i'th atom
+
+        Args:
+            mol: the molecule of interest
+
+        Returns:
+            ppp_labels (List[list]): list of all PPP labels for each atoms
+        """
+
+        smarts_mols = CATS._pattern_to_mols()
+
+        ppp_labels = ["" for x in range(0, mol.GetNumAtoms())]
+        for label, patterns in smarts_mols.items():
+            for pattern in patterns:
+                matched = False
+                for matchbase in mol.GetSubstructMatches(pattern, uniquify=True):
+                    for idx in matchbase:
+                        if ppp_labels[idx] == "":
+                            ppp_labels[idx] = [label]
+                        else:
+                            tmp = ppp_labels[idx]
+                            tmp.append(label)
+                            ppp_labels[idx] = tmp
+                    matched = True
+                if matched:
+                    break
+        return ppp_labels
+
+    def _get_ppp_matrix(self, n_atoms: int, ppp_labels: List):
+        """Compute PPP matrix from label list
+
+        Args:
+            n_atoms (int): number of atoms
+            ppp_labels (list): PPP labels returned by
+
+        Returns:
+            pppm (dict): PPP matrix where the keys are the coordinate
+        """
+
+        pppm = {}
+        for i in range(0, n_atoms):
+            ppp_i = ppp_labels[i]
+            if ppp_i == "":
+                continue
+            for j in range(0, n_atoms):
+                ppp_j = ppp_labels[j]
+                if ppp_j == "":
+                    continue
+                pairs = []
+                for x in ppp_i:
+                    for y in ppp_j:
+                        if (x, y) not in pairs and (y, x) not in pairs:
+                            ## make sure to add the labels in increasing
+                            ## lexicographical order
+                            if x < y:
+                                tmp = (x, y)
+                            else:
+                                tmp = (y, x)
+                            pairs.append(tmp)
+                pppm[(i, j)] = pairs
+        return pppm
+
+    def _calculate(self, mol, dist_mat):
+        """Calculate the CATS2D descriptors for current molecule, given a distance matrix"""
+
+        n_atoms = mol.GetNumAtoms()
+        ppp_labels = self._get_pcore_group(mol)
+        ppp_mat = self._get_ppp_matrix(n_atoms, ppp_labels)
+
+        # get the counturence of each of the PPP's
+        ppp_count = dict(zip(["D", "N", "A", "P", "L", "R"], [0] * 6))
+        for label in ppp_labels:
+            for ppp in label:
+                ppp_count[ppp] = ppp_count[ppp] + 1
+
+        # lets calculate the CATS2D raw descriptor
+        # bins: a, b, c ==> [a, b], [b, c], [c, *]
+        # a is always 0
+        desc = [[0 for x in range(len(self.bins))] for x in range(0, len(self.DESCRIPTORS))]
+        for (x, y), labels in ppp_mat.items():
+            dist = dist_mat[x, y]
+            # ignore all interactions greater than the max distance we set
+            # we cannot have negative distance
+            if dist > self.max_dist or dist < 0:
+                continue
+
+            for pair in labels:
+                idx = self.DESCRIPTORS.index(f"{pair[0]}{pair[1]}")
+                vals = desc[idx]
+                dist_bin = np.digitize(dist, self.bins)
+                # indexing at 0
+                vals[dist_bin - 1] += 1
+                desc[idx] = vals
+
+        if self.scale == "num":
+            for row in range(0, len(desc)):
+                for col in range(0, len(desc[0])):
+                    desc[row][col] = float(desc[row][col]) / n_atoms
+
+        elif self.scale == "count":
+            #  get the scaling factors
+            facs = [0] * len(self.DESCRIPTORS)
+            count = 0
+            for ppp in self.DESCRIPTORS:
+                facs[count] = ppp_count[ppp[0]] + ppp_count[ppp[1]]
+                count += 1
+
+            # each row in desc corresponds to a PPP pair
+            # so the scale factor is constant over cols of a row
+            count = 0
+            for i in range(0, len(desc)):
+                if facs[i] == 0:
+                    continue
+                for j in range(0, len(desc[0])):
+                    desc[i][j] = desc[i][j] / float(facs[i])
+
+        res = []
+        for row in desc:
+            for col in row:
+                res.append(col)
+        return res
+
+    def __len__(self):
+        """Return the length of the calculator"""
+        return len(self._columns)
+
+    def __call__(self, mol: Union[dm.Mol, str], conformer_id: int = -1):
+        """Get CATS 2D descriptors for a molecule
+
+        Args:
+            mol: the molecule of interest.
+            conformer_id: Optional conformer id. Only relevant when `use_3d_distances`
+                is set to True.
+
+        Returns:
+            props (np.ndarray): list of computed rdkit molecular descriptors
+        """
+
+        mol = dm.to_mol(mol)
+
+        if self.use_3d_distances:
+            if mol.GetNumConformers() < 1:  # type: ignore
+                raise ValueError("Expected a molecule with conformers information.")
+
+            dist_mat = Get3DDistanceMatrix(mol, confId=conformer_id)
+
+        else:
+            dist_mat = GetDistanceMatrix(mol).astype(int)
+
+        out = self._calculate(mol, dist_mat)
+        return to_numpy(out)
+
+    @property
+    def columns(self):
+        """Get the descriptors columns"""
+        return self._columns
+
+    def __getstate__(self):
+        """Serialize the class for pickling."""
+        state = {}
+        state["max_dist"] = self.max_dist
+        state["bins"] = self.bins
+        state["scale"] = self.scale
+        state["use_3d_distances"] = self.use_3d_distances
+        return state
+
+    def __setstate__(self, state: dict):
+        """Reload the class from pickling."""
+        self.__dict__.update(state)
+        self._set_columns()
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the descriptors columns

+
+ +
+ + + + +
+ + + + +

+ __call__(mol, conformer_id=-1) + +

+ + +
+ +

Get CATS 2D descriptors for a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest.

+
+
+ required +
conformer_id + int + +
+

Optional conformer id. Only relevant when use_3d_distances +is set to True.

+
+
+ -1 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
props + ndarray + +
+

list of computed rdkit molecular descriptors

+
+
+ +
+ Source code in molfeat/calc/cats.py +
289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
def __call__(self, mol: Union[dm.Mol, str], conformer_id: int = -1):
+    """Get CATS 2D descriptors for a molecule
+
+    Args:
+        mol: the molecule of interest.
+        conformer_id: Optional conformer id. Only relevant when `use_3d_distances`
+            is set to True.
+
+    Returns:
+        props (np.ndarray): list of computed rdkit molecular descriptors
+    """
+
+    mol = dm.to_mol(mol)
+
+    if self.use_3d_distances:
+        if mol.GetNumConformers() < 1:  # type: ignore
+            raise ValueError("Expected a molecule with conformers information.")
+
+        dist_mat = Get3DDistanceMatrix(mol, confId=conformer_id)
+
+    else:
+        dist_mat = GetDistanceMatrix(mol).astype(int)
+
+    out = self._calculate(mol, dist_mat)
+    return to_numpy(out)
+
+
+
+ +
+ + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Serialize the class for pickling.

+ +
+ Source code in molfeat/calc/cats.py +
320
+321
+322
+323
+324
+325
+326
+327
def __getstate__(self):
+    """Serialize the class for pickling."""
+    state = {}
+    state["max_dist"] = self.max_dist
+    state["bins"] = self.bins
+    state["scale"] = self.scale
+    state["use_3d_distances"] = self.use_3d_distances
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ __init__(max_dist=None, bins=None, scale='raw', use_3d_distances=False, **kwargs) + +

+ + +
+ +

Calculator for the CATS descriptors.

+

max_dist and bins will both determine the length of the fingerprint vector, +which you can get by calling len(calc)

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
max_dist + Union[float, int] + +
+

Maximum distance between pairs. When set to None, the default for 2D is +set to max_dist=8 and for 3D to max_dist=5.

+
+
+ None +
bins + List[int] + +
+

Bins to use. Defaults to equal spacing [0, max_dist[.

+
+
+ None +
scale + str + +
+

How to scale the values. Supported values are: + - 'raw' for the raw values. + - 'num' for values normalized by the number of atoms. + - 'count' for scaling based on occurence of the PPP.

+
+
+ 'raw' +
use_3d_distances + bool + +
+

Whether to use the 3D distances instead of the topological distances. +If set to True, the input molecules must contain a conformer.

+
+
+ False +
kwargs + +
+

silently ignored extra parameters for compatibility with other calculators.

+
+
+ {} +
+ +
+ Source code in molfeat/calc/cats.py +
 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
def __init__(
+    self,
+    max_dist: Union[float, int] = None,
+    bins: List[int] = None,
+    scale: str = "raw",
+    use_3d_distances: bool = False,
+    **kwargs,
+):
+    """Calculator for the CATS descriptors.
+
+    `max_dist` and `bins` will both determine the length of the fingerprint vector,
+    which you can get by calling `len(calc)`
+
+    Args:
+        max_dist: Maximum distance between pairs. When set to None, the default for 2D is
+            set to `max_dist=8` and for 3D to `max_dist=5`.
+        bins: Bins to use. Defaults to equal spacing `[0, max_dist[`.
+        scale: How to scale the values. Supported values are:
+             - 'raw' for the raw values.
+             - 'num' for values normalized by the number of atoms.
+             - 'count' for scaling based on occurence of the PPP.
+        use_3d_distances: Whether to use the 3D distances instead of the topological distances.
+            If set to True, the input molecules must contain a conformer.
+        kwargs: silently ignored extra parameters for compatibility with other calculators.
+    """
+
+    # Set the max_dist default is set to None
+    if max_dist is None:
+        if use_3d_distances:
+            max_dist = CATS.MAX_DIST_DEFAULT_3D
+        else:
+            max_dist = CATS.MAX_DIST_DEFAULT_2D
+
+    self.max_dist = max_dist
+    self.use_3d_distances = use_3d_distances
+
+    if bins is None:
+        bins = list(np.arange(1, np.floor(self.max_dist + 1), 1))
+
+    # we don't allow interaction that exceed our distance threshold.
+    bins = [x for x in bins if x <= self.max_dist]
+
+    # we start distance indexing at 0
+    if 0 not in bins:
+        bins += [0]
+
+    self.bins = list(sorted(bins))
+
+    self.scale = scale
+
+    self._set_columns()
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Return the length of the calculator

+ +
+ Source code in molfeat/calc/cats.py +
285
+286
+287
def __len__(self):
+    """Return the length of the calculator"""
+    return len(self._columns)
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(state) + +

+ + +
+ +

Reload the class from pickling.

+ +
+ Source code in molfeat/calc/cats.py +
329
+330
+331
+332
def __setstate__(self, state: dict):
+    """Reload the class from pickling."""
+    self.__dict__.update(state)
+    self._set_columns()
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Pharmacophore

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ Pharmacophore2D + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

2D Pharmacophore.

+

The fingerprint is computed using Generate.Gen2DFingerprint from RDKit.

+

An explanation of pharmacophore fingerprints and how the bits are set +is available in the RDKit book. In particular the following figure describes the process. +Pharmacophore{ align=left }

+ +
+ Source code in molfeat/calc/pharmacophore.py +
 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
class Pharmacophore2D(SerializableCalculator):
+    """2D Pharmacophore.
+
+    The fingerprint is computed using `Generate.Gen2DFingerprint` from RDKit.
+
+    An explanation of pharmacophore fingerprints and how the bits are set
+    is available in the RDKit book. In particular the following figure describes the process.
+    ![Pharmacophore](https://www.rdkit.org/docs/_images/picture_10.jpg){ align=left }
+    """
+
+    def __init__(
+        self,
+        factory: Union[str, MolChemicalFeatureFactory] = "pmapper",
+        length: Optional[int] = 2048,
+        useCounts: bool = None,
+        minPointCount: int = None,
+        maxPointCount: int = None,
+        shortestPathsOnly: bool = None,
+        includeBondOrder: bool = None,
+        skipFeats: List[str] = None,
+        trianglePruneBins: bool = None,
+        bins: List[Tuple[int, int]] = None,
+        **kwargs,
+    ):
+        """Pharmacophore computation.
+
+        Args:
+            factory: Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path
+                to a feature definition or a feature factory object
+            length: Optional desired length. If provided, the fp will be refold or padded to that length.
+                If set to None, fallback to the default for the provided sig factory.
+            minPointCount: Minimum number of points.
+            maxPointCount: Maximum number of points.
+            trianglePruneBins: Whether to prune the triangle inequality.
+            includeBondOrder: Whether to consider bond order.
+            shortestPathsOnly: Whether to only use the shortest path between pharmacophores.
+            useCounts: Whether take into account the count information. This will also impact how the folding works.
+            bins: Bins to use.
+        """
+
+        self.factory = factory
+        self.useCounts = useCounts
+        self.minPointCount = minPointCount
+        self.maxPointCount = maxPointCount
+        self.shortestPathsOnly = shortestPathsOnly
+        self.includeBondOrder = includeBondOrder
+        self.skipFeats = skipFeats
+        self.trianglePruneBins = trianglePruneBins
+        self.bins = bins
+
+        self.length = length
+
+        self._init_sig_factory()
+
+    def __call__(self, mol: Union[dm.Mol, str], raw: bool = False):
+        """Compute the Pharmacophore fingeprint for the input molecule.
+
+        Args:
+            mol: the molecule of interest
+            raw: Whether to return the raw fingerprint or a Numpy array.
+
+        Returns:
+            fp: the computed fingerprint as a Numpy array or as a raw object.
+        """
+
+        # Get a molecule
+        mol = dm.to_mol(mol)
+
+        if mol is None:
+            raise ValueError("The input molecule is not valid.")
+
+        # Get distance matrix
+        use_bond_order = self.sig_factory.includeBondOrder
+        d_mat = rdmolops.GetDistanceMatrix(mol, use_bond_order)
+
+        # Generate the fingerprint
+        fp = Generate.Gen2DFingerprint(mol, self.sig_factory, dMat=d_mat)
+
+        # Posprocessing
+        if self.length and self._should_fold:
+            # refold the fingerprint
+            fp = fold_count_fp(fp, dim=self.length, binary=not (self.useCounts or False))
+            if raw:
+                fp = to_fp(fp, bitvect=True)
+
+        if not raw:
+            fp = to_numpy(fp)
+
+        return fp
+
+    def _init_sig_factory(self):
+        """Init the feature factory for this pharmacophore."""
+
+        self.sig_factory = get_sig_factory(
+            self.factory,
+            useCounts=self.useCounts,
+            minPointCount=self.minPointCount,
+            maxPointCount=self.maxPointCount,
+            shortestPathsOnly=self.shortestPathsOnly,
+            includeBondOrder=self.includeBondOrder,
+            skipFeats=self.skipFeats,
+            trianglePruneBins=self.trianglePruneBins,
+            bins=self.bins,
+        )
+
+        # Reinject used params to the class attributes
+        # It might be useful in case the default values are changed
+        # and when serializing the object.
+        self.useCounts = self.sig_factory.useCounts
+        self.minPointCount = self.sig_factory.minPointCount
+        self.maxPointCount = self.sig_factory.maxPointCount
+        self.shortestPathsOnly = self.sig_factory.shortestPathsOnly
+        self.includeBondOrder = self.sig_factory.includeBondOrder
+        self.skipFeats = self.sig_factory.skipFeats
+        self.trianglePruneBins = self.sig_factory.trianglePruneBins
+        self.bins = self.sig_factory.GetBins()
+
+    @property
+    @functools.lru_cache(maxsize=None)
+    def _should_fold(self):
+        return self.sig_factory.GetSigSize() != len(self)
+
+    @property
+    def feature_factory(self):
+        return self.sig_factory.featFactory
+
+    def __len__(self):
+        """Returns the length of the pharmacophore"""
+        return self.length or self.sig_factory.GetSigSize()
+
+    @property
+    def columns(self):
+        """Get the name of all the descriptors of this calculator."""
+
+        if not self.length:
+            return [self.sig_factory.GetBitDescription(x) for x in range(len(self))]
+        else:
+            return [f"Desc:{i}" for i in range(self.length)]
+
+    def __getstate__(self):
+        """Serialize the class for pickling."""
+        state = {}
+        state["factory"] = self.factory
+        state["useCounts"] = self.useCounts
+        state["minPointCount"] = self.minPointCount
+        state["maxPointCount"] = self.maxPointCount
+        state["shortestPathsOnly"] = self.shortestPathsOnly
+        state["includeBondOrder"] = self.includeBondOrder
+        state["skipFeats"] = self.skipFeats
+        state["trianglePruneBins"] = self.trianglePruneBins
+        state["bins"] = self.bins
+        state["length"] = self.length
+        return state
+
+    def __setstate__(self, state: dict):
+        """Reload the class from pickling."""
+        self.__dict__.update(state)
+        self._init_sig_factory()
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the name of all the descriptors of this calculator.

+
+ +
+ + + + +
+ + + + +

+ __call__(mol, raw=False) + +

+ + +
+ +

Compute the Pharmacophore fingeprint for the input molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
raw + bool + +
+

Whether to return the raw fingerprint or a Numpy array.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fp + +
+

the computed fingerprint as a Numpy array or as a raw object.

+
+
+ +
+ Source code in molfeat/calc/pharmacophore.py +
 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
def __call__(self, mol: Union[dm.Mol, str], raw: bool = False):
+    """Compute the Pharmacophore fingeprint for the input molecule.
+
+    Args:
+        mol: the molecule of interest
+        raw: Whether to return the raw fingerprint or a Numpy array.
+
+    Returns:
+        fp: the computed fingerprint as a Numpy array or as a raw object.
+    """
+
+    # Get a molecule
+    mol = dm.to_mol(mol)
+
+    if mol is None:
+        raise ValueError("The input molecule is not valid.")
+
+    # Get distance matrix
+    use_bond_order = self.sig_factory.includeBondOrder
+    d_mat = rdmolops.GetDistanceMatrix(mol, use_bond_order)
+
+    # Generate the fingerprint
+    fp = Generate.Gen2DFingerprint(mol, self.sig_factory, dMat=d_mat)
+
+    # Posprocessing
+    if self.length and self._should_fold:
+        # refold the fingerprint
+        fp = fold_count_fp(fp, dim=self.length, binary=not (self.useCounts or False))
+        if raw:
+            fp = to_fp(fp, bitvect=True)
+
+    if not raw:
+        fp = to_numpy(fp)
+
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Serialize the class for pickling.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
def __getstate__(self):
+    """Serialize the class for pickling."""
+    state = {}
+    state["factory"] = self.factory
+    state["useCounts"] = self.useCounts
+    state["minPointCount"] = self.minPointCount
+    state["maxPointCount"] = self.maxPointCount
+    state["shortestPathsOnly"] = self.shortestPathsOnly
+    state["includeBondOrder"] = self.includeBondOrder
+    state["skipFeats"] = self.skipFeats
+    state["trianglePruneBins"] = self.trianglePruneBins
+    state["bins"] = self.bins
+    state["length"] = self.length
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ __init__(factory='pmapper', length=2048, useCounts=None, minPointCount=None, maxPointCount=None, shortestPathsOnly=None, includeBondOrder=None, skipFeats=None, trianglePruneBins=None, bins=None, **kwargs) + +

+ + +
+ +

Pharmacophore computation.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
factory + Union[str, MolChemicalFeatureFactory] + +
+

Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path +to a feature definition or a feature factory object

+
+
+ 'pmapper' +
length + Optional[int] + +
+

Optional desired length. If provided, the fp will be refold or padded to that length. +If set to None, fallback to the default for the provided sig factory.

+
+
+ 2048 +
minPointCount + int + +
+

Minimum number of points.

+
+
+ None +
maxPointCount + int + +
+

Maximum number of points.

+
+
+ None +
trianglePruneBins + bool + +
+

Whether to prune the triangle inequality.

+
+
+ None +
includeBondOrder + bool + +
+

Whether to consider bond order.

+
+
+ None +
shortestPathsOnly + bool + +
+

Whether to only use the shortest path between pharmacophores.

+
+
+ None +
useCounts + bool + +
+

Whether take into account the count information. This will also impact how the folding works.

+
+
+ None +
bins + List[Tuple[int, int]] + +
+

Bins to use.

+
+
+ None +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
def __init__(
+    self,
+    factory: Union[str, MolChemicalFeatureFactory] = "pmapper",
+    length: Optional[int] = 2048,
+    useCounts: bool = None,
+    minPointCount: int = None,
+    maxPointCount: int = None,
+    shortestPathsOnly: bool = None,
+    includeBondOrder: bool = None,
+    skipFeats: List[str] = None,
+    trianglePruneBins: bool = None,
+    bins: List[Tuple[int, int]] = None,
+    **kwargs,
+):
+    """Pharmacophore computation.
+
+    Args:
+        factory: Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path
+            to a feature definition or a feature factory object
+        length: Optional desired length. If provided, the fp will be refold or padded to that length.
+            If set to None, fallback to the default for the provided sig factory.
+        minPointCount: Minimum number of points.
+        maxPointCount: Maximum number of points.
+        trianglePruneBins: Whether to prune the triangle inequality.
+        includeBondOrder: Whether to consider bond order.
+        shortestPathsOnly: Whether to only use the shortest path between pharmacophores.
+        useCounts: Whether take into account the count information. This will also impact how the folding works.
+        bins: Bins to use.
+    """
+
+    self.factory = factory
+    self.useCounts = useCounts
+    self.minPointCount = minPointCount
+    self.maxPointCount = maxPointCount
+    self.shortestPathsOnly = shortestPathsOnly
+    self.includeBondOrder = includeBondOrder
+    self.skipFeats = skipFeats
+    self.trianglePruneBins = trianglePruneBins
+    self.bins = bins
+
+    self.length = length
+
+    self._init_sig_factory()
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Returns the length of the pharmacophore

+ +
+ Source code in molfeat/calc/pharmacophore.py +
166
+167
+168
def __len__(self):
+    """Returns the length of the pharmacophore"""
+    return self.length or self.sig_factory.GetSigSize()
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(state) + +

+ + +
+ +

Reload the class from pickling.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
194
+195
+196
+197
def __setstate__(self, state: dict):
+    """Reload the class from pickling."""
+    self.__dict__.update(state)
+    self._init_sig_factory()
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ Pharmacophore3D + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

3D Pharmacophore.

+

The fingerprint is computed using pmapper.

+

This featurizer supports building a consensus pharmacophore from a set of molecules.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
class Pharmacophore3D(SerializableCalculator):
+    """3D Pharmacophore.
+
+    The fingerprint is computed using [`pmapper`](https://github.com/DrrDom/pmapper).
+
+    This featurizer supports building a consensus pharmacophore from a set of molecules.
+    """
+
+    def __init__(
+        self,
+        factory: Union[str, MolChemicalFeatureFactory] = "pmapper",
+        length: int = 2048,
+        bin_step: float = 1,
+        min_features: int = 2,
+        max_features: int = 3,
+        use_modulo: bool = True,
+        tolerance: float = 0,
+    ):
+        """Pharmacophore computation.
+
+        Args:
+            factory: Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path
+                to a feature definition or a feature factory object
+            length: Optional desired length. If provided, the fp will be refold or padded to that length.
+                If set to None, fallback to the default for the provided sig factory.
+            bin_step: Bin step to use.
+            min_features: Minimum number of features to use.
+            max_features: Maximum number of features to use.
+            use_modulo: whether to use modulo to compute the pharmacophore fingerprint
+            tolerance: tolerance value to use when computing the pharmacophore fingerprint
+        """
+
+        self.factory = factory
+        self.length = length
+        self.bin_step = bin_step
+        self.min_features = min_features
+        self.max_features = max_features
+        self.use_modulo = use_modulo
+        self.tolerance = tolerance
+
+        self._init_feature_factory()
+
+    def __call__(self, mol: Union[dm.Mol, str], conformer_id: int = -1, raw: bool = False):
+        """Compute the Pharmacophore fingeprint for the input molecule.
+
+        Args:
+            mol: the molecule of interest
+            conformer_id: the conformer id to use.
+            raw: Whether to return the raw fingerprint or a Numpy array.
+
+        Returns:
+            fp: the computed fingerprint as a Numpy array.
+        """
+
+        # Get a molecule
+        mol = dm.to_mol(mol)
+
+        if mol is None:
+            raise ValueError("The input molecule is not valid.")
+
+        if mol.GetNumConformers() < 1:  # type: ignore
+            raise ValueError("Expected a molecule with conformers information.")
+
+        # Get the features for the mol
+        features = self.get_features(mol, conformer_id=conformer_id)
+
+        # Convert features dataframe to coordinates
+        if features.empty:
+            features_coords = []
+        else:
+            features_coords = features[["feature_name", "coords"]].values.tolist()
+
+        # Compute the fingerprint
+        fp = self.compute_fp_from_coords(features_coords, raw=raw)
+
+        return fp
+
+    def consensus_fp(
+        self,
+        mols: List[dm.Mol],
+        align: bool = True,
+        conformer_id: int = -1,
+        copy: bool = True,
+        min_samples_ratio: float = 0.5,
+        eps: float = 2,
+        raw: bool = False,
+        **cluster_kwargs,
+    ):
+        """Compute a consensus fingerprint from a list of molecules.
+
+        Args:
+            mols: a list of molecules.
+            align: Whether to align the conformers of the molecules.
+            conformer_id: Optional conformer id.
+            copy: Whether to copy the molecules before clustering.
+            min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+                to be considered as a core point.
+            eps: The maximum distance between two samples for one to be considered as
+                in the neighborhood of the other.
+            raw: Whether to return the raw fingerprint or a Numpy array.
+            cluster_kwargs: additional keyword arguments for the clustering algorithm.
+        """
+
+        # Get all the features
+        features = self.get_features_from_many(
+            mols,
+            keep_mols=True,
+            align=align,
+            conformer_id=conformer_id,
+            copy=copy,
+        )
+
+        # Retrieve the aligned molecules
+        mols = features.groupby("mol_index").first()["mol"].tolist()
+        # Cluster the features
+        clustered_features = self.cluster_features(
+            features, min_samples_ratio=min_samples_ratio, eps=eps, **cluster_kwargs
+        )
+        # Convert features dataframe to coordinates
+        if clustered_features.empty:
+            features_coords = []
+        else:
+            features_coords = clustered_features[["feature_name", "coords"]].values.tolist()
+        # Compute the fingerprint
+        fp = self.compute_fp_from_coords(features_coords, raw=raw)
+
+        return fp
+
+    def _init_feature_factory(self):
+        """Init the feature factory."""
+        self.feature_factory = get_feature_factory(self.factory)
+
+    def get_features(self, mol: dm.Mol, conformer_id: int = -1) -> pd.DataFrame:
+        """Retrieve the features for a given molecule.
+
+        Args:
+            mol: the molecule of interest
+
+        Returns:
+            features: the features as a Numpy array
+        """
+        features_data = []
+
+        # Extract the features for this molecule
+        features = self.feature_factory.GetFeaturesForMol(mol, confId=conformer_id)
+
+        # Extract all the feature atom indices for this molecule
+        for feature in features:
+            datum = {}
+            datum["feature_id"] = feature.GetId()
+            datum["feature_name"] = feature.GetFamily()
+            datum["feature_type"] = feature.GetType()
+            datum["atom_indices"] = feature.GetAtomIds()
+            datum["coords"] = np.array(feature.GetPos())
+
+            features_data.append(datum)
+
+        features_data = pd.DataFrame(features_data)
+
+        return features_data
+
+    def get_features_from_many(
+        self,
+        mols: List[dm.Mol],
+        align: bool = True,
+        conformer_id: int = -1,
+        copy: bool = True,
+        keep_mols: bool = False,
+    ):
+        """Extract all the features from a list of molecules after an optional
+        alignement step.
+
+        Args:
+            mols: List of molecules with conformers.
+            align: Whether to align the conformers of the molecules.
+            conformer_id: Optional conformer id.
+            copy: Whether to copy the molecules before clustering.
+            keep_mols: Whether to keep the molecules in the returned dataframe.
+        """
+
+        if not all([mol.GetNumConformers() >= 1 for mol in mols]):
+            raise ValueError("One or more input molecules is missing a conformer.")
+
+        # Make a copy of the molecules since they are going to be modified
+        if copy:
+            mols = [dm.copy_mol(mol) for mol in mols]
+
+        # Align the conformers
+        if align:
+            mols, _ = commons.align_conformers(mols, copy=False, conformer_id=conformer_id)
+
+        all_features = pd.DataFrame()
+
+        for i, mol in enumerate(mols):
+            features = self.get_features(mol)
+            features["mol_index"] = i
+
+            if keep_mols:
+                features["mol"] = mol
+
+            all_features = pd.concat([all_features, features], ignore_index=True)
+
+        return all_features
+
+    def compute_fp_from_coords(
+        self,
+        features_coords: List[Tuple[str, Tuple[float]]],
+        raw: bool = False,
+    ):
+        """Compute a fingerprint from a list of features.
+
+        Args:
+            features_coords: Features coords: `[('A', (1.23, 2.34, 3.45)), ('A', (4.56, 5.67, 6.78)), ...]`.
+            raw: Whether to return the raw fingerprint or a Numpy array.
+        """
+
+        # Init the pmapper engine
+        ph_engine = Pharm(bin_step=self.bin_step)
+        # Convert coords to list in case those are arrays
+        features_coords = [(name, tuple(coords)) for name, coords in features_coords]
+        # Load pharmacophore points
+        ph_engine.load_from_feature_coords(features_coords)
+        # Init the iterator over the pharmacophore points
+        points_iterator = ph_engine.iterate_pharm(
+            min_features=self.min_features,
+            max_features=self.max_features,
+            tol=self.tolerance,
+            return_feature_ids=False,
+        )
+
+        # Compute the fingerprint
+        on_bits = set()
+        for h in points_iterator:
+            if self.use_modulo:
+                on_bits.add(int(h, 16) % self.length)  # type: ignore
+            else:
+                random.seed(int(h, 16))  # type: ignore
+                on_bits.add(random.randrange(self.length))
+
+        if raw:
+            return np.array(on_bits)
+
+        fp = np.zeros(self.length, dtype=int)
+        fp[list(on_bits)] = 1
+
+        return fp
+
+    def cluster_features(
+        self,
+        features: pd.DataFrame,
+        min_samples_ratio: float = 0.5,
+        n_mols: int = None,
+        eps: float = np.inf,
+        **kwargs,
+    ):
+        """Cluster a set of pharmacophoric features using OPTICS.
+        The only reason why we are not using SpectralClustering is because of the need to provide
+        the number of clusters.
+
+        Args:
+            features: A dataframe of features.
+            min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+                to be considered as a core point.
+            n_mols: Optional number of compounds to compute `min_samples` from the
+                `min_samples_ratio` value. If not set it will use `mol_index` from
+                the `features` dataframe.
+            eps: The maximum distance between two samples for one to be considered as
+                in the neighborhood of the other. This is max_eps in OPTICS
+            kwargs: Any additional parameters to pass to `sklearn.cluster.OPTICS`.
+        """
+
+        if n_mols is None:
+            n_mols = len(features["mol_index"].unique())
+
+        # Compute min_samples
+        min_samples = max(int(round(min_samples_ratio * n_mols, 0)), 1)
+        clusters = []
+        feature_id = 0
+        for _, rows in features.groupby("feature_name"):
+            feature_name = rows.iloc[0]["feature_name"]
+            if min_samples > rows.shape[0]:
+                logger.info(
+                    f"Feature {feature_name} does not have enough molecule ({len(rows)}), skipping"
+                )
+                continue
+            coords = np.vstack(rows["coords"].values)
+
+            # Init clustering
+            optics = OPTICS(min_samples=min_samples, max_eps=eps, **kwargs)
+            optics = optics.fit(coords)
+            labels = optics.labels_
+            # a node that is not a core would basically be a node that cannot be labeled
+            # thus border nodes are considered core
+            core_samples_mask = np.zeros_like(labels, dtype=bool)
+            core_samples_mask[labels == 1] = True
+
+            # Find the centroids (consensus points)
+            unique_labels = set(labels)
+            for k in unique_labels:
+                if k == -1:
+                    continue
+                class_member_mask = labels == k
+                cluster_coords = coords[class_member_mask & core_samples_mask]
+                if len(cluster_coords) == 0:
+                    continue
+                cluster_centroid = cluster_coords.mean(axis=0)
+
+                cluster = {}
+                cluster["feature_id"] = feature_id
+                cluster["feature_name"] = feature_name
+                cluster["coords"] = cluster_centroid
+                cluster["cluster_size"] = len(cluster_coords)
+
+                clusters.append(cluster)
+                feature_id += 1
+
+        clusters = pd.DataFrame(clusters)
+
+        return clusters
+
+    ## Viz methods
+
+    def show(
+        self,
+        mol: dm.Mol,
+        features: pd.DataFrame = None,
+        alpha: float = 1.0,
+        sphere_radius: float = 0.4,
+        show_legend: bool = True,
+    ):
+        """Show a 3D view of a given molecule with the pharmacophoric features.
+
+        Args:
+            mol: the molecule of interest
+            alpha: Alpha value for the colors (currently not working).
+            sphere_radius: Radius of the spheres for the features.
+            show_legend: Display the legend (the layout is bad but at least it
+                shows the legend).
+        """
+
+        if features is None:
+            features = self.get_features(mol)
+
+        return viz.show_pharm_features(
+            mol,
+            features=features,
+            feature_factory=self.feature_factory,
+            alpha=alpha,
+            sphere_radius=sphere_radius,
+            show_legend=show_legend,
+        )
+
+    def show_many(
+        self,
+        mols: List[dm.Mol],
+        align: bool = True,
+        conformer_id: int = -1,
+        copy: bool = True,
+        min_samples_ratio: float = 0.5,
+        eps: float = 2,
+        alpha: float = 1.0,
+        sphere_radius: float = 0.4,
+        show_legend: bool = True,
+    ):
+        """Show a 3D view of a given molecule with the pharmacophoric features.
+
+        Args:
+            mols: a list of molecules.
+            align: Whether to align the conformers of the molecules.
+            conformer_id: Optional conformer id.
+            copy: Whether to copy the molecules before clustering.
+            min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+                to be considered as a core point.
+            eps: The maximum distance between two samples for one to be considered as
+                in the neighborhood of the other.
+            alpha: Alpha value for the colors (currently not working).
+            sphere_radius: Radius of the spheres for the features.
+            show_legend: Display the legend (the layout is bad but at least it
+                shows the legend).
+        """
+
+        # Get all the features
+        features = self.get_features_from_many(
+            mols,
+            keep_mols=True,
+            align=align,
+            conformer_id=conformer_id,
+            copy=copy,
+        )
+
+        # Retrieve the aligned molecules
+        mols = features.groupby("mol_index").first()["mol"].tolist()
+
+        # Cluster the features
+        clustered_features = self.cluster_features(
+            features,
+            min_samples_ratio=min_samples_ratio,
+            eps=eps,
+        )
+
+        return viz.show_pharm_features(
+            mols,
+            features=clustered_features,
+            feature_factory=self.feature_factory,
+            alpha=alpha,
+            sphere_radius=sphere_radius,
+            show_legend=show_legend,
+        )
+
+    def __getstate__(self):
+        """Serialize the class for pickling."""
+        state = {}
+        state["factory"] = self.factory
+        state["length"] = self.length
+        state["bin_step"] = self.bin_step
+        state["min_features"] = self.min_features
+        state["max_features"] = self.max_features
+        state["use_modulo"] = self.use_modulo
+        state["tolerance"] = self.tolerance
+        return state
+
+    def __setstate__(self, state: dict):
+        """Reload the class from pickling."""
+        self.__dict__.update(state)
+        self._init_feature_factory()
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol, conformer_id=-1, raw=False) + +

+ + +
+ +

Compute the Pharmacophore fingeprint for the input molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
conformer_id + int + +
+

the conformer id to use.

+
+
+ -1 +
raw + bool + +
+

Whether to return the raw fingerprint or a Numpy array.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fp + +
+

the computed fingerprint as a Numpy array.

+
+
+ +
+ Source code in molfeat/calc/pharmacophore.py +
242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
def __call__(self, mol: Union[dm.Mol, str], conformer_id: int = -1, raw: bool = False):
+    """Compute the Pharmacophore fingeprint for the input molecule.
+
+    Args:
+        mol: the molecule of interest
+        conformer_id: the conformer id to use.
+        raw: Whether to return the raw fingerprint or a Numpy array.
+
+    Returns:
+        fp: the computed fingerprint as a Numpy array.
+    """
+
+    # Get a molecule
+    mol = dm.to_mol(mol)
+
+    if mol is None:
+        raise ValueError("The input molecule is not valid.")
+
+    if mol.GetNumConformers() < 1:  # type: ignore
+        raise ValueError("Expected a molecule with conformers information.")
+
+    # Get the features for the mol
+    features = self.get_features(mol, conformer_id=conformer_id)
+
+    # Convert features dataframe to coordinates
+    if features.empty:
+        features_coords = []
+    else:
+        features_coords = features[["feature_name", "coords"]].values.tolist()
+
+    # Compute the fingerprint
+    fp = self.compute_fp_from_coords(features_coords, raw=raw)
+
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Serialize the class for pickling.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
def __getstate__(self):
+    """Serialize the class for pickling."""
+    state = {}
+    state["factory"] = self.factory
+    state["length"] = self.length
+    state["bin_step"] = self.bin_step
+    state["min_features"] = self.min_features
+    state["max_features"] = self.max_features
+    state["use_modulo"] = self.use_modulo
+    state["tolerance"] = self.tolerance
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ __init__(factory='pmapper', length=2048, bin_step=1, min_features=2, max_features=3, use_modulo=True, tolerance=0) + +

+ + +
+ +

Pharmacophore computation.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
factory + Union[str, MolChemicalFeatureFactory] + +
+

Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path +to a feature definition or a feature factory object

+
+
+ 'pmapper' +
length + int + +
+

Optional desired length. If provided, the fp will be refold or padded to that length. +If set to None, fallback to the default for the provided sig factory.

+
+
+ 2048 +
bin_step + float + +
+

Bin step to use.

+
+
+ 1 +
min_features + int + +
+

Minimum number of features to use.

+
+
+ 2 +
max_features + int + +
+

Maximum number of features to use.

+
+
+ 3 +
use_modulo + bool + +
+

whether to use modulo to compute the pharmacophore fingerprint

+
+
+ True +
tolerance + float + +
+

tolerance value to use when computing the pharmacophore fingerprint

+
+
+ 0 +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
def __init__(
+    self,
+    factory: Union[str, MolChemicalFeatureFactory] = "pmapper",
+    length: int = 2048,
+    bin_step: float = 1,
+    min_features: int = 2,
+    max_features: int = 3,
+    use_modulo: bool = True,
+    tolerance: float = 0,
+):
+    """Pharmacophore computation.
+
+    Args:
+        factory: Which features factory to use. One of "default", "cats", "gobbi" , "pmapper" or path
+            to a feature definition or a feature factory object
+        length: Optional desired length. If provided, the fp will be refold or padded to that length.
+            If set to None, fallback to the default for the provided sig factory.
+        bin_step: Bin step to use.
+        min_features: Minimum number of features to use.
+        max_features: Maximum number of features to use.
+        use_modulo: whether to use modulo to compute the pharmacophore fingerprint
+        tolerance: tolerance value to use when computing the pharmacophore fingerprint
+    """
+
+    self.factory = factory
+    self.length = length
+    self.bin_step = bin_step
+    self.min_features = min_features
+    self.max_features = max_features
+    self.use_modulo = use_modulo
+    self.tolerance = tolerance
+
+    self._init_feature_factory()
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(state) + +

+ + +
+ +

Reload the class from pickling.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
621
+622
+623
+624
def __setstate__(self, state: dict):
+    """Reload the class from pickling."""
+    self.__dict__.update(state)
+    self._init_feature_factory()
+
+
+
+ +
+ + +
+ + + + +

+ cluster_features(features, min_samples_ratio=0.5, n_mols=None, eps=np.inf, **kwargs) + +

+ + +
+ +

Cluster a set of pharmacophoric features using OPTICS. +The only reason why we are not using SpectralClustering is because of the need to provide +the number of clusters.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
features + DataFrame + +
+

A dataframe of features.

+
+
+ required +
min_samples_ratio + float + +
+

Percentages of mols that must contain a pharmacophoric point +to be considered as a core point.

+
+
+ 0.5 +
n_mols + int + +
+

Optional number of compounds to compute min_samples from the +min_samples_ratio value. If not set it will use mol_index from +the features dataframe.

+
+
+ None +
eps + float + +
+

The maximum distance between two samples for one to be considered as +in the neighborhood of the other. This is max_eps in OPTICS

+
+
+ inf +
kwargs + +
+

Any additional parameters to pass to sklearn.cluster.OPTICS.

+
+
+ {} +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
def cluster_features(
+    self,
+    features: pd.DataFrame,
+    min_samples_ratio: float = 0.5,
+    n_mols: int = None,
+    eps: float = np.inf,
+    **kwargs,
+):
+    """Cluster a set of pharmacophoric features using OPTICS.
+    The only reason why we are not using SpectralClustering is because of the need to provide
+    the number of clusters.
+
+    Args:
+        features: A dataframe of features.
+        min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+            to be considered as a core point.
+        n_mols: Optional number of compounds to compute `min_samples` from the
+            `min_samples_ratio` value. If not set it will use `mol_index` from
+            the `features` dataframe.
+        eps: The maximum distance between two samples for one to be considered as
+            in the neighborhood of the other. This is max_eps in OPTICS
+        kwargs: Any additional parameters to pass to `sklearn.cluster.OPTICS`.
+    """
+
+    if n_mols is None:
+        n_mols = len(features["mol_index"].unique())
+
+    # Compute min_samples
+    min_samples = max(int(round(min_samples_ratio * n_mols, 0)), 1)
+    clusters = []
+    feature_id = 0
+    for _, rows in features.groupby("feature_name"):
+        feature_name = rows.iloc[0]["feature_name"]
+        if min_samples > rows.shape[0]:
+            logger.info(
+                f"Feature {feature_name} does not have enough molecule ({len(rows)}), skipping"
+            )
+            continue
+        coords = np.vstack(rows["coords"].values)
+
+        # Init clustering
+        optics = OPTICS(min_samples=min_samples, max_eps=eps, **kwargs)
+        optics = optics.fit(coords)
+        labels = optics.labels_
+        # a node that is not a core would basically be a node that cannot be labeled
+        # thus border nodes are considered core
+        core_samples_mask = np.zeros_like(labels, dtype=bool)
+        core_samples_mask[labels == 1] = True
+
+        # Find the centroids (consensus points)
+        unique_labels = set(labels)
+        for k in unique_labels:
+            if k == -1:
+                continue
+            class_member_mask = labels == k
+            cluster_coords = coords[class_member_mask & core_samples_mask]
+            if len(cluster_coords) == 0:
+                continue
+            cluster_centroid = cluster_coords.mean(axis=0)
+
+            cluster = {}
+            cluster["feature_id"] = feature_id
+            cluster["feature_name"] = feature_name
+            cluster["coords"] = cluster_centroid
+            cluster["cluster_size"] = len(cluster_coords)
+
+            clusters.append(cluster)
+            feature_id += 1
+
+    clusters = pd.DataFrame(clusters)
+
+    return clusters
+
+
+
+ +
+ + +
+ + + + +

+ compute_fp_from_coords(features_coords, raw=False) + +

+ + +
+ +

Compute a fingerprint from a list of features.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
features_coords + List[Tuple[str, Tuple[float]]] + +
+

Features coords: [('A', (1.23, 2.34, 3.45)), ('A', (4.56, 5.67, 6.78)), ...].

+
+
+ required +
raw + bool + +
+

Whether to return the raw fingerprint or a Numpy array.

+
+
+ False +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
def compute_fp_from_coords(
+    self,
+    features_coords: List[Tuple[str, Tuple[float]]],
+    raw: bool = False,
+):
+    """Compute a fingerprint from a list of features.
+
+    Args:
+        features_coords: Features coords: `[('A', (1.23, 2.34, 3.45)), ('A', (4.56, 5.67, 6.78)), ...]`.
+        raw: Whether to return the raw fingerprint or a Numpy array.
+    """
+
+    # Init the pmapper engine
+    ph_engine = Pharm(bin_step=self.bin_step)
+    # Convert coords to list in case those are arrays
+    features_coords = [(name, tuple(coords)) for name, coords in features_coords]
+    # Load pharmacophore points
+    ph_engine.load_from_feature_coords(features_coords)
+    # Init the iterator over the pharmacophore points
+    points_iterator = ph_engine.iterate_pharm(
+        min_features=self.min_features,
+        max_features=self.max_features,
+        tol=self.tolerance,
+        return_feature_ids=False,
+    )
+
+    # Compute the fingerprint
+    on_bits = set()
+    for h in points_iterator:
+        if self.use_modulo:
+            on_bits.add(int(h, 16) % self.length)  # type: ignore
+        else:
+            random.seed(int(h, 16))  # type: ignore
+            on_bits.add(random.randrange(self.length))
+
+    if raw:
+        return np.array(on_bits)
+
+    fp = np.zeros(self.length, dtype=int)
+    fp[list(on_bits)] = 1
+
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ consensus_fp(mols, align=True, conformer_id=-1, copy=True, min_samples_ratio=0.5, eps=2, raw=False, **cluster_kwargs) + +

+ + +
+ +

Compute a consensus fingerprint from a list of molecules.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Mol] + +
+

a list of molecules.

+
+
+ required +
align + bool + +
+

Whether to align the conformers of the molecules.

+
+
+ True +
conformer_id + int + +
+

Optional conformer id.

+
+
+ -1 +
copy + bool + +
+

Whether to copy the molecules before clustering.

+
+
+ True +
min_samples_ratio + float + +
+

Percentages of mols that must contain a pharmacophoric point +to be considered as a core point.

+
+
+ 0.5 +
eps + float + +
+

The maximum distance between two samples for one to be considered as +in the neighborhood of the other.

+
+
+ 2 +
raw + bool + +
+

Whether to return the raw fingerprint or a Numpy array.

+
+
+ False +
cluster_kwargs + +
+

additional keyword arguments for the clustering algorithm.

+
+
+ {} +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
def consensus_fp(
+    self,
+    mols: List[dm.Mol],
+    align: bool = True,
+    conformer_id: int = -1,
+    copy: bool = True,
+    min_samples_ratio: float = 0.5,
+    eps: float = 2,
+    raw: bool = False,
+    **cluster_kwargs,
+):
+    """Compute a consensus fingerprint from a list of molecules.
+
+    Args:
+        mols: a list of molecules.
+        align: Whether to align the conformers of the molecules.
+        conformer_id: Optional conformer id.
+        copy: Whether to copy the molecules before clustering.
+        min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+            to be considered as a core point.
+        eps: The maximum distance between two samples for one to be considered as
+            in the neighborhood of the other.
+        raw: Whether to return the raw fingerprint or a Numpy array.
+        cluster_kwargs: additional keyword arguments for the clustering algorithm.
+    """
+
+    # Get all the features
+    features = self.get_features_from_many(
+        mols,
+        keep_mols=True,
+        align=align,
+        conformer_id=conformer_id,
+        copy=copy,
+    )
+
+    # Retrieve the aligned molecules
+    mols = features.groupby("mol_index").first()["mol"].tolist()
+    # Cluster the features
+    clustered_features = self.cluster_features(
+        features, min_samples_ratio=min_samples_ratio, eps=eps, **cluster_kwargs
+    )
+    # Convert features dataframe to coordinates
+    if clustered_features.empty:
+        features_coords = []
+    else:
+        features_coords = clustered_features[["feature_name", "coords"]].values.tolist()
+    # Compute the fingerprint
+    fp = self.compute_fp_from_coords(features_coords, raw=raw)
+
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ get_features(mol, conformer_id=-1) + +

+ + +
+ +

Retrieve the features for a given molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Mol + +
+

the molecule of interest

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
features + DataFrame + +
+

the features as a Numpy array

+
+
+ +
+ Source code in molfeat/calc/pharmacophore.py +
332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
def get_features(self, mol: dm.Mol, conformer_id: int = -1) -> pd.DataFrame:
+    """Retrieve the features for a given molecule.
+
+    Args:
+        mol: the molecule of interest
+
+    Returns:
+        features: the features as a Numpy array
+    """
+    features_data = []
+
+    # Extract the features for this molecule
+    features = self.feature_factory.GetFeaturesForMol(mol, confId=conformer_id)
+
+    # Extract all the feature atom indices for this molecule
+    for feature in features:
+        datum = {}
+        datum["feature_id"] = feature.GetId()
+        datum["feature_name"] = feature.GetFamily()
+        datum["feature_type"] = feature.GetType()
+        datum["atom_indices"] = feature.GetAtomIds()
+        datum["coords"] = np.array(feature.GetPos())
+
+        features_data.append(datum)
+
+    features_data = pd.DataFrame(features_data)
+
+    return features_data
+
+
+
+ +
+ + +
+ + + + +

+ get_features_from_many(mols, align=True, conformer_id=-1, copy=True, keep_mols=False) + +

+ + +
+ +

Extract all the features from a list of molecules after an optional +alignement step.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Mol] + +
+

List of molecules with conformers.

+
+
+ required +
align + bool + +
+

Whether to align the conformers of the molecules.

+
+
+ True +
conformer_id + int + +
+

Optional conformer id.

+
+
+ -1 +
copy + bool + +
+

Whether to copy the molecules before clustering.

+
+
+ True +
keep_mols + bool + +
+

Whether to keep the molecules in the returned dataframe.

+
+
+ False +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
def get_features_from_many(
+    self,
+    mols: List[dm.Mol],
+    align: bool = True,
+    conformer_id: int = -1,
+    copy: bool = True,
+    keep_mols: bool = False,
+):
+    """Extract all the features from a list of molecules after an optional
+    alignement step.
+
+    Args:
+        mols: List of molecules with conformers.
+        align: Whether to align the conformers of the molecules.
+        conformer_id: Optional conformer id.
+        copy: Whether to copy the molecules before clustering.
+        keep_mols: Whether to keep the molecules in the returned dataframe.
+    """
+
+    if not all([mol.GetNumConformers() >= 1 for mol in mols]):
+        raise ValueError("One or more input molecules is missing a conformer.")
+
+    # Make a copy of the molecules since they are going to be modified
+    if copy:
+        mols = [dm.copy_mol(mol) for mol in mols]
+
+    # Align the conformers
+    if align:
+        mols, _ = commons.align_conformers(mols, copy=False, conformer_id=conformer_id)
+
+    all_features = pd.DataFrame()
+
+    for i, mol in enumerate(mols):
+        features = self.get_features(mol)
+        features["mol_index"] = i
+
+        if keep_mols:
+            features["mol"] = mol
+
+        all_features = pd.concat([all_features, features], ignore_index=True)
+
+    return all_features
+
+
+
+ +
+ + +
+ + + + +

+ show(mol, features=None, alpha=1.0, sphere_radius=0.4, show_legend=True) + +

+ + +
+ +

Show a 3D view of a given molecule with the pharmacophoric features.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Mol + +
+

the molecule of interest

+
+
+ required +
alpha + float + +
+

Alpha value for the colors (currently not working).

+
+
+ 1.0 +
sphere_radius + float + +
+

Radius of the spheres for the features.

+
+
+ 0.4 +
show_legend + bool + +
+

Display the legend (the layout is bad but at least it +shows the legend).

+
+
+ True +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
def show(
+    self,
+    mol: dm.Mol,
+    features: pd.DataFrame = None,
+    alpha: float = 1.0,
+    sphere_radius: float = 0.4,
+    show_legend: bool = True,
+):
+    """Show a 3D view of a given molecule with the pharmacophoric features.
+
+    Args:
+        mol: the molecule of interest
+        alpha: Alpha value for the colors (currently not working).
+        sphere_radius: Radius of the spheres for the features.
+        show_legend: Display the legend (the layout is bad but at least it
+            shows the legend).
+    """
+
+    if features is None:
+        features = self.get_features(mol)
+
+    return viz.show_pharm_features(
+        mol,
+        features=features,
+        feature_factory=self.feature_factory,
+        alpha=alpha,
+        sphere_radius=sphere_radius,
+        show_legend=show_legend,
+    )
+
+
+
+ +
+ + +
+ + + + +

+ show_many(mols, align=True, conformer_id=-1, copy=True, min_samples_ratio=0.5, eps=2, alpha=1.0, sphere_radius=0.4, show_legend=True) + +

+ + +
+ +

Show a 3D view of a given molecule with the pharmacophoric features.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Mol] + +
+

a list of molecules.

+
+
+ required +
align + bool + +
+

Whether to align the conformers of the molecules.

+
+
+ True +
conformer_id + int + +
+

Optional conformer id.

+
+
+ -1 +
copy + bool + +
+

Whether to copy the molecules before clustering.

+
+
+ True +
min_samples_ratio + float + +
+

Percentages of mols that must contain a pharmacophoric point +to be considered as a core point.

+
+
+ 0.5 +
eps + float + +
+

The maximum distance between two samples for one to be considered as +in the neighborhood of the other.

+
+
+ 2 +
alpha + float + +
+

Alpha value for the colors (currently not working).

+
+
+ 1.0 +
sphere_radius + float + +
+

Radius of the spheres for the features.

+
+
+ 0.4 +
show_legend + bool + +
+

Display the legend (the layout is bad but at least it +shows the legend).

+
+
+ True +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
def show_many(
+    self,
+    mols: List[dm.Mol],
+    align: bool = True,
+    conformer_id: int = -1,
+    copy: bool = True,
+    min_samples_ratio: float = 0.5,
+    eps: float = 2,
+    alpha: float = 1.0,
+    sphere_radius: float = 0.4,
+    show_legend: bool = True,
+):
+    """Show a 3D view of a given molecule with the pharmacophoric features.
+
+    Args:
+        mols: a list of molecules.
+        align: Whether to align the conformers of the molecules.
+        conformer_id: Optional conformer id.
+        copy: Whether to copy the molecules before clustering.
+        min_samples_ratio: Percentages of mols that must contain a pharmacophoric point
+            to be considered as a core point.
+        eps: The maximum distance between two samples for one to be considered as
+            in the neighborhood of the other.
+        alpha: Alpha value for the colors (currently not working).
+        sphere_radius: Radius of the spheres for the features.
+        show_legend: Display the legend (the layout is bad but at least it
+            shows the legend).
+    """
+
+    # Get all the features
+    features = self.get_features_from_many(
+        mols,
+        keep_mols=True,
+        align=align,
+        conformer_id=conformer_id,
+        copy=copy,
+    )
+
+    # Retrieve the aligned molecules
+    mols = features.groupby("mol_index").first()["mol"].tolist()
+
+    # Cluster the features
+    clustered_features = self.cluster_features(
+        features,
+        min_samples_ratio=min_samples_ratio,
+        eps=eps,
+    )
+
+    return viz.show_pharm_features(
+        mols,
+        features=clustered_features,
+        feature_factory=self.feature_factory,
+        alpha=alpha,
+        sphere_radius=sphere_radius,
+        show_legend=show_legend,
+    )
+
+
+
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + +

+ get_feature_factory(factory) + +

+ + +
+ +

Build a feature factory.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
def get_feature_factory(
+    factory: Union[str, MolChemicalFeatureFactory]
+) -> MolChemicalFeatureFactory:
+    """Build a feature factory."""
+
+    if isinstance(factory, MolChemicalFeatureFactory):
+        feature_factory = factory
+
+    elif factory == "pmapper":
+        with pkg_resources.path("pmapper", "smarts_features.fdef") as fdef_name:
+            feature_factory = ChemicalFeatures.BuildFeatureFactory(str(fdef_name))  # type: ignore
+
+    elif factory == "gobbi":
+        feature_factory = Gobbi_Pharm2D.factory.featFactory
+
+    elif factory == "cats":
+        with pkg_resources.open_text("molfeat.data", "cats_features.fdef") as instream:
+            feature_factory = ChemicalFeatures.BuildFeatureFactoryFromString(instream.read())  # type: ignore
+
+    elif factory == "default":
+        # Load default feature definition file
+        fdefFile = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")
+        feature_factory = ChemicalFeatures.BuildFeatureFactory(fdefFile)  # type: ignore
+
+    elif dm.fs.exists(factory):
+        with fsspec.open(factory, "r") as instream:
+            fdef = instream.read()
+            feature_factory = ChemicalFeatures.BuildFeatureFactoryFromString(fdef)  # type: ignore
+
+    else:
+        raise ValueError(f"The factory '{factory}' is not supported.")
+
+    return feature_factory
+
+
+
+ +
+ + +
+ + + + +

+ get_sig_factory(factory, useCounts=None, minPointCount=None, maxPointCount=None, shortestPathsOnly=None, includeBondOrder=None, skipFeats=None, trianglePruneBins=None, bins=None, init_factory=True) + +

+ + +
+ +

Build a signature factory.

+ +
+ Source code in molfeat/calc/pharmacophore.py +
665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
def get_sig_factory(
+    factory: Union[str, MolChemicalFeatureFactory],
+    useCounts: bool = None,
+    minPointCount: int = None,
+    maxPointCount: int = None,
+    shortestPathsOnly: bool = None,
+    includeBondOrder: bool = None,
+    skipFeats: List[str] = None,
+    trianglePruneBins: bool = None,
+    bins: List[Tuple[int, int]] = None,
+    init_factory: bool = True,
+):
+    """Build a signature factory."""
+
+    # Get feature factory
+    feature_factory = get_feature_factory(factory)
+
+    # Get default params and override them as needed
+    params, bins = get_sig_factory_params(
+        factory,
+        useCounts=useCounts,
+        minPointCount=minPointCount,
+        maxPointCount=maxPointCount,
+        shortestPathsOnly=shortestPathsOnly,
+        includeBondOrder=includeBondOrder,
+        skipFeats=skipFeats,
+        trianglePruneBins=trianglePruneBins,
+        bins=bins,
+    )
+
+    # Build signature factory
+    sig_factory = SigFactory(feature_factory, **params)
+
+    # Set bins
+    sig_factory.SetBins(bins)
+
+    # Init the factory
+    if init_factory:
+        sig_factory.Init()
+
+    return sig_factory
+
+
+
+ +
+ + +
+ + + + +

+ get_sig_factory_params(factory_name, useCounts=None, minPointCount=None, maxPointCount=None, shortestPathsOnly=None, includeBondOrder=None, skipFeats=None, trianglePruneBins=None, bins=None) + +

+ + +
+ +

Get the default parameter for a given sig factory allowing some of them to be overriden.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
factory_name + str + +
+

The name of the factory.

+
+
+ required +
+ +
+ Source code in molfeat/calc/pharmacophore.py +
708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
def get_sig_factory_params(
+    factory_name: str,
+    useCounts: bool = None,
+    minPointCount: int = None,
+    maxPointCount: int = None,
+    shortestPathsOnly: bool = None,
+    includeBondOrder: bool = None,
+    skipFeats: List[str] = None,
+    trianglePruneBins: bool = None,
+    bins: List[Tuple[int, int]] = None,
+) -> Tuple[Dict[str, Any], list]:
+    """Get the default parameter for a given sig factory allowing some of them to be overriden.
+
+    Args:
+        factory_name: The name of the factory.
+    """
+
+    # Get default params.
+
+    if factory_name == "cats":
+        default_bins = [
+            (0, 1),
+            (1, 2),
+            (2, 3),
+            (3, 4),
+            (4, 5),
+            (5, 6),
+            (6, 7),
+            (7, 8),
+            (8, 9),
+        ]
+        params = dict(
+            useCounts=True,
+            minPointCount=2,
+            maxPointCount=2,
+            trianglePruneBins=True,
+            shortestPathsOnly=True,
+            includeBondOrder=False,
+        )
+
+    elif factory_name == "gobbi":
+        default_bins = [(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)]
+        params = dict(
+            useCounts=False,
+            minPointCount=2,
+            maxPointCount=3,
+            trianglePruneBins=True,
+            shortestPathsOnly=True,
+            includeBondOrder=False,
+        )
+
+    elif factory_name == "pmapper":
+        default_bins = [(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)]
+        params = dict(
+            useCounts=False,
+            minPointCount=2,
+            maxPointCount=3,
+            trianglePruneBins=False,
+            shortestPathsOnly=True,
+            includeBondOrder=False,
+        )
+
+    elif factory_name == "default":
+        params = dict(
+            useCounts=False,
+            minPointCount=2,
+            maxPointCount=3,
+            trianglePruneBins=False,
+            shortestPathsOnly=True,
+            skipFeats=["ZnBinder", "LumpedHydrophobe"],
+            includeBondOrder=False,
+        )
+        default_bins = [(2, 3), (3, 4), (4, 5), (5, 6), (6, 7), (7, 8), (8, 100)]
+
+    else:
+        raise ValueError(f"Default values for {factory_name} are not known.")
+
+    # Override default params when set.
+
+    if minPointCount is not None:
+        params["minPointCount"] = minPointCount
+
+    if maxPointCount is not None:
+        params["maxPointCount"] = maxPointCount
+
+    if trianglePruneBins is not None:
+        params["trianglePruneBins"] = trianglePruneBins
+
+    if includeBondOrder is not None:
+        params["includeBondOrder"] = includeBondOrder
+
+    if useCounts is not None:
+        params["useCounts"] = useCounts
+
+    if skipFeats is not None:
+        params["skipFeats"] = skipFeats  # type: ignore
+
+    if shortestPathsOnly is not None:
+        params["shortestPathsOnly"] = shortestPathsOnly
+
+    bins = bins or default_bins
+
+    return params, bins
+
+
+
+ +
+ + + +
+ +
+ +

+

Scaffold Keys

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ScaffoldKeyCalculator + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Implementation of the Scaffold Keys described in +Identification of Bioisosteric Scaffolds using Scaffold Keys by Peter Ertl

+ +
+ Source code in molfeat/calc/skeys.py +
104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
class ScaffoldKeyCalculator(SerializableCalculator):
+    """
+    Implementation of the Scaffold Keys described in
+    `Identification of Bioisosteric Scaffolds using Scaffold Keys` by Peter Ertl
+    """
+
+    DESCRIPTORS = [
+        "n_atom_in_rings",
+        "n_atom_in_conjugated_ring",
+        "n_atoms_not_in_conjugated_ring",
+        "n_atom_in_chain",
+        "n_atom_exocyclic",
+        "n_nitrogen",
+        "n_nitrogen_in_ring",
+        "n_oxygen",
+        "n_oxygen_in_ring",
+        "n_sulfur",
+        "n_heteroatoms",
+        "n_heteroatoms_in_ring",
+        "n_atom_spiro_atoms",
+        "n_heteroatom_more_than_2_conn",
+        "n_carbon_atleast_2_heteroatoms",
+        "n_atom_at_least_2_nei_more_than_2_conn",
+        "abs_scaffold_format_charge",
+        "n_bonds",
+        "n_multiple_non_conj_ring_bonds",
+        "n_bonds_2_heteroatoms",
+        "n_carbon_het_carbon_het_bonds",
+        "n_bonds_at_least_3_conn",
+        "n_exocyclic_single_bonds_carbon",
+        "n_exocyclic_single_bonds_nitrogen",
+        "n_non_ring_bonds_2_conj_rings",
+        "n_non_ring_bonds_conj_nonconj_rings",
+        "n_bonds_atoms_with_at_least_one_nei_with_2_conn",
+        "n_simple_rings",
+        "size_largest_ring",
+        "n_simple_rings_no_heteroatoms",
+        "n_simple_rings_1_heteroatoms",
+        "n_simple_rings_2_heteroatoms",
+        "n_simple_rings_at_least_3_heteroatoms",
+        "n_simple_non_conj_5_atoms_rings",
+        "n_simple_non_conj_6_atoms_rings",
+        "n_ring_system",
+        "n_ring_system_with_2_non_conj_simple_ring",
+        "n_ring_system_with_2_conj_simple_ring",
+        "n_ring_system_with_conj_non_conj_simple_ring",
+        "n_ring_system_with_3_conj_simple_ring",
+        "n_ring_system_with_3_non_conj_simple_ring",
+        "n_ring_system_with_greater_one_conj_nonconj_simple_ring",
+    ]
+
+    NORM_PARAMS = pd.read_csv(
+        Path(molfeat.__file__).parents[0].joinpath("data/skey_parameters.csv"),
+        index_col=0,
+    ).loc[DESCRIPTORS]
+
+    def __init__(
+        self, normalize: bool = False, verbose: bool = False, use_scaffold: bool = False, **kwargs
+    ):
+        """
+        Init of the scaffold key function
+
+        Args:
+            normalize: whether to normalize the value of the feature
+            verbose: whether to log errors
+            use_scaffold: whether to convert the molecule into scaffold first
+        """
+        self.normalize = normalize
+        self.verbose = verbose
+        self.use_scaffold = use_scaffold
+
+    def __getstate__(self):
+        """Get state of the scaffold key function"""
+        state = {}
+        state["normalize"] = self.normalize
+        state["verbose"] = self.verbose
+        state["use_scaffold"] = self.use_scaffold
+        return state
+
+    def __len__(self):
+        return len(self.DESCRIPTORS)
+
+    @classmethod
+    def compute_normalization(cls, features: np.ndarray):
+        """Normalize input features. The normalization parameters are
+        computed by the scaffolds of 2.1M molecules from CHEMBL 29.
+        """
+        return (features - cls.NORM_PARAMS["mean"]) / cls.NORM_PARAMS["std"]
+
+    def n_atom_in_rings(self, mol: dm.Mol):
+        """1. number of ring atoms"""
+        sm = dm.from_smarts("[r]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_atom_in_conjugated_ring(self, mol: dm.Mol):
+        """2. number of atoms in conjugated rings"""
+        ri = mol.GetRingInfo()
+        n = 0
+        for ring in ri.AtomRings():
+            if _is_ring_fully_conjugated(mol, ring):
+                n += len(ring)
+        return n
+
+    def n_atoms_not_in_conjugated_ring(self, mol: dm.Mol):
+        """
+        3. number of atoms not in conjugated rings
+        (i.e. atoms in aliphatic rings and non-ring atoms)
+        """
+        # EN: replace conjugation by aromatic
+        ri = mol.GetRingInfo()
+        n = 0
+        for ring in ri.AtomRings():
+            if not _is_ring_fully_conjugated(mol, ring):
+                n += len(ring)
+        return n
+
+    def n_atom_in_chain(self, mol: dm.Mol):
+        """4. number atoms in chains (not counting double-connected exo-chain atoms)"""
+        sm = dm.from_smarts("[!r;!$(*=[r])]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_atom_exocyclic(self, mol: dm.Mol):
+        """5. number of exocyclic atoms (connected by multiple bonds to a ring)"""
+        sm = dm.from_smarts("[!r;!$(*-[r])&$(*~[r])]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_nitrogen(self, mol: dm.Mol):
+        """6. number of nitrogen"""
+        sm = dm.from_smarts("[#7]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_nitrogen_in_ring(self, mol: dm.Mol):
+        """7. number of nitrogen in rings"""
+        sm = dm.from_smarts("[#7;r]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_oxygen(self, mol: dm.Mol):
+        """8. number of oxygen"""
+        sm = dm.from_smarts("[#8]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_oxygen_in_ring(self, mol: dm.Mol):
+        """9. number of oxygen in rings"""
+        sm = dm.from_smarts("[#8]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_sulfur(self, mol: dm.Mol):
+        """10. number of sulfur atoms"""
+        sm = dm.from_smarts("[#16]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_heteroatoms(self, mol: dm.Mol):
+        """11. number of heteroatoms"""
+
+        sm = dm.from_smarts("[!#1&!#6]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_heteroatoms_in_ring(self, mol: dm.Mol):
+        """12. number of heteroatoms in rings"""
+        sm = dm.from_smarts("[!#1&!#6&r]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_atom_spiro_atoms(self, mol: dm.Mol):
+        """13. number of spiro atoms"""
+        return Desc.CalcNumSpiroAtoms(mol)
+
+    def n_heteroatom_more_than_2_conn(self, mol: dm.Mol):
+        """14. number of heteroatoms with more than 2 connections"""
+        sm = dm.from_smarts("[!#1;!#6;!D1!D0;!D2]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_carbon_atleast_2_heteroatoms(self, mol: dm.Mol):
+        """15. number of carbon atoms connected to at least 2 heteroatoms"""
+        n_atoms = 0
+        for atom in mol.GetAtoms():
+            tmp = [x for x in atom.GetNeighbors() if x.GetAtomicNum() not in [1, 6]]
+            n_atoms += len(tmp) >= 2
+        return n_atoms
+
+    def n_atom_at_least_2_nei_more_than_2_conn(self, mol: dm.Mol):
+        """16. Number of atoms where at least 2 connected atoms have more than 2 connections"""
+        n_atoms = 0
+        for atom in mol.GetAtoms():
+            tmp = [x for x in atom.GetNeighbors() if len(x.GetNeighbors()) > 2]
+            n_atoms += len(tmp) > 2
+        return n_atoms
+
+    def abs_scaffold_format_charge(self, mol: dm.Mol):
+        """17. absolute value of the scaffold formal charge"""
+        charge = GetFormalCharge(mol)
+        return abs(charge)
+
+    def n_bonds(self, mol: dm.Mol):
+        """18. number of bonds"""
+        return mol.GetNumBonds()
+
+    def n_multiple_non_conj_ring_bonds(self, mol: dm.Mol):
+        """19. number of multiple, nonconjugated ring bonds"""
+        extracted_rings = []
+        nr_multiple_bonds_infcr = 0  # infcr: in not fully conjugated ring
+        rings = Chem.GetSymmSSSR(mol)
+        for i in range(len(rings)):
+            extracted_rings.append(list(rings[i]))
+        for ring in extracted_rings:
+            if not _is_ring_fully_conjugated(mol, ring):
+                nr_multiple_bonds_infcr += _n_multiple_bond_in_ring(mol, ring)
+        return nr_multiple_bonds_infcr
+
+    def n_bonds_2_heteroatoms(self, mol: dm.Mol):
+        """20. number of bonds connecting 2 heteroatoms"""
+        sm = dm.from_smarts("[!#1&!#6]~[!#1&!#6]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_carbon_het_carbon_het_bonds(self, mol: dm.Mol):
+        """21. number of bonds connecting 2 heteroatoms through 2 carbons"""
+        sm = dm.from_smarts("[!#1&!#6]~[#6]~[#6]~[!#1&!#6]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_bonds_at_least_3_conn(self, mol: dm.Mol):
+        """22. number of bonds with at least 3 connections on both its atoms"""
+        sm = dm.from_smarts("[$([!#1](~[!#1])(~[!#1])~[!#1])][$([!#1](~[!#1])(~[!#1])~[!#1])]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_exocyclic_single_bonds_carbon(self, mol: dm.Mol):
+        """23. number of exocyclic single bonds where a ring atom is carbon"""
+        sm = dm.from_smarts("[!R;!#1]-[#6;R]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_exocyclic_single_bonds_nitrogen(self, mol: dm.Mol):
+        """24. number of exocyclic single bonds where a ring atom is nitrogen"""
+        sm = dm.from_smarts("[!R;!#1]-[#7;R]")
+        return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+    def n_non_ring_bonds_2_conj_rings(self, mol: dm.Mol):
+        """25. number of non-ring bonds connecting 2 nonconjugated rings"""
+        # EN: this is interpretated literally as bonds and not path
+        ring_atom_conj_state = _ring_atom_state(mol)
+        sm = dm.from_smarts("[R:1]!@[R:2]")
+        bond_list = mol.GetSubstructMatches(sm, uniquify=True)
+        result = 0
+        for a_start, a_end in bond_list:
+            s_state = ring_atom_conj_state.get(a_start)
+            e_state = ring_atom_conj_state.get(a_end)
+            if False in s_state and False in e_state:
+                result += 1
+        return result
+
+    def n_non_ring_bonds_conj_nonconj_rings(self, mol: dm.Mol):
+        """
+        26. number of non-ring bonds connecting 2 rings,
+        one of them conjugated and one non-conjugated
+        """
+        # EN: this is interpretated literally as bonds and not path
+
+        ring_atom_conj_state = _ring_atom_state(mol)
+        sm = dm.from_smarts("[R:1]!@[R:2]")
+        bond_list = mol.GetSubstructMatches(sm, uniquify=True)
+        result = 0
+        for a_start, a_end in bond_list:
+            s_state = ring_atom_conj_state.get(a_start)
+            e_state = ring_atom_conj_state.get(a_end)
+            if (True in s_state and False in e_state) or (False in s_state and True in e_state):
+                result += 1
+        return result
+
+    def n_bonds_atoms_with_at_least_one_nei_with_2_conn(self, mol: dm.Mol):
+        """
+        27. number of bonds where both atoms have at least one neighbor
+        (not considering the bond atoms) with more than 2 connections
+        """
+        result = 0
+        huge_conn = list(
+            itertools.chain(*mol.GetSubstructMatches(dm.from_smarts("[*;!D0;!D1;!D2]"), uniquify=1))
+        )
+        for bond in mol.GetBonds():
+            a_start, a_end = bond.GetBeginAtom(), bond.GetEndAtom()
+            # we need to exclud the bond atom themselves
+            allowed_conn_table = [
+                x for x in huge_conn if x not in [a_start.GetIdx(), a_end.GetIdx()]
+            ]
+            if any([x.GetIdx() in allowed_conn_table for x in a_start.GetNeighbors()]) and any(
+                [y.GetIdx() in allowed_conn_table for y in a_end.GetNeighbors()]
+            ):
+                result += 1
+        return result
+
+    def n_simple_rings(self, mol: dm.Mol):
+        """28. number of simple rings"""
+        ri = mol.GetRingInfo()
+        return ri.NumRings()
+
+    def size_largest_ring(self, mol: dm.Mol):
+        """29. Size of the largest ring"""
+        ri = mol.GetRingInfo()
+        max_ring_size = max((len(r) for r in ri.AtomRings()), default=0)
+        return max_ring_size
+
+    def n_simple_rings_no_heteroatoms(self, mol: dm.Mol):
+        """30. number of simple rings with no heteroatoms"""
+        ri = mol.GetRingInfo()
+        n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+        return sum(1 for x in n_heteros if x == 0)
+
+    def n_simple_rings_1_heteroatoms(self, mol: dm.Mol):
+        """31. number of simple rings with 1 heteroatom"""
+        ri = mol.GetRingInfo()
+        n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+        return sum(1 for x in n_heteros if x == 1)
+
+    def n_simple_rings_2_heteroatoms(self, mol: dm.Mol):
+        """32. number of simple rings with 2 heteroatom"""
+        ri = mol.GetRingInfo()
+        n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+        return sum(1 for x in n_heteros if x == 2)
+
+    def n_simple_rings_at_least_3_heteroatoms(self, mol: dm.Mol):
+        """33. number of simple rings with 3 or more heteroatoms"""
+        ri = mol.GetRingInfo()
+        n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+        return sum(1 for x in n_heteros if x >= 3)
+
+    def n_simple_non_conj_5_atoms_rings(self, mol: dm.Mol):
+        """34. number of simple non-conjugated rings with 5 atoms"""
+        ri = mol.GetRingInfo()
+        n = 0
+        for ring in ri.AtomRings():
+            if not _is_ring_fully_conjugated(mol, ring) and len(ring) == 5:
+                n += 1
+        return n
+
+    def n_simple_non_conj_6_atoms_rings(self, mol: dm.Mol):
+        """35. number of simple non-conjugated rings with 6 atoms"""
+        ri = mol.GetRingInfo()
+        n = 0
+        for ring in ri.AtomRings():
+            if not _is_ring_fully_conjugated(mol, ring) and len(ring) == 6:
+                n += 1
+        return n
+
+    def n_ring_system(self, mol: dm.Mol):
+        """36. number of ring systems"""
+        simple_rings, ring_system, _ = _get_ring_system(mol)
+        return len(ring_system)
+
+    def n_ring_system_with_2_non_conj_simple_ring(self, mol: dm.Mol):
+        """37. number of rings systems with 2 non-conjugated simple rings"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            n_not_conj = sum(not conj_rings_map[rnum] for rnum in ring_set)
+            result += n_not_conj == 2
+        return result
+
+    def n_ring_system_with_2_conj_simple_ring(self, mol: dm.Mol):
+        """38. number of rings systems with 2 conjugated simple rings"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+            result += n_conj == 2
+        return result
+
+    def n_ring_system_with_conj_non_conj_simple_ring(self, mol: dm.Mol):
+        """39 number of ring system containing 2 simple rings, one conjugated and one nonconjugated"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            if len(ring_set) == 2:
+                n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+                result += n_conj == 1
+        return result
+
+    def n_ring_system_with_3_conj_simple_ring(self, mol: dm.Mol):
+        """40. number of rings systems with 3 conjugated simple rings"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+            result += n_conj == 3
+        return result
+
+    def n_ring_system_with_3_non_conj_simple_ring(self, mol: dm.Mol):
+        """41. number of rings systems with 3 non-conjugated simple rings"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            n_not_conj = sum(not conj_rings_map[rnum] for rnum in ring_set)
+            result += n_not_conj == 3
+        return result
+
+    def n_ring_system_with_greater_one_conj_nonconj_simple_ring(self, mol: dm.Mol):
+        """42. number of ring system containing 3 simple rings, at least one conjugated and one nonconjugated"""
+        simple_rings, _, ring_map = _get_ring_system(mol)
+        conj_rings_map = dict(
+            (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+        )
+        result = 0
+        for ring_set in ring_map:
+            if len(ring_set) == 3:
+                n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+                result += n_conj in [1, 2]
+        return result
+
+    @property
+    def columns(self):
+        """Get the name of all the descriptors of this calculator"""
+        return list(self.DESCRIPTORS)
+
+    def __call__(self, mol: Union[dm.Mol, str]):
+        r"""
+        Compute the Fingerprint of a molecule
+
+        Args:
+            mol: the molecule of interest
+
+        Returns:
+            props (np.ndarray): list of computed rdkit molecular descriptors
+        """
+        mol = dm.to_mol(mol)
+        if self.use_scaffold and mol is not None:
+            mol = MurckoScaffold.GetScaffoldForMol(mol)
+
+        props = []
+        for k in self.DESCRIPTORS:
+            try:
+                fn = getattr(self, k)
+                props.append(fn(mol))
+            except Exception as e:
+                if self.verbose:
+                    logger.error(e)
+                props.append(float("nan"))
+        props = np.asarray(props)
+        if self.normalize:
+            return self.compute_normalization(props)
+        return props
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the name of all the descriptors of this calculator

+
+ +
+ + + + +
+ + + + +

+ __call__(mol) + +

+ + +
+ +

Compute the Fingerprint of a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
props + ndarray + +
+

list of computed rdkit molecular descriptors

+
+
+ +
+ Source code in molfeat/calc/skeys.py +
527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
def __call__(self, mol: Union[dm.Mol, str]):
+    r"""
+    Compute the Fingerprint of a molecule
+
+    Args:
+        mol: the molecule of interest
+
+    Returns:
+        props (np.ndarray): list of computed rdkit molecular descriptors
+    """
+    mol = dm.to_mol(mol)
+    if self.use_scaffold and mol is not None:
+        mol = MurckoScaffold.GetScaffoldForMol(mol)
+
+    props = []
+    for k in self.DESCRIPTORS:
+        try:
+            fn = getattr(self, k)
+            props.append(fn(mol))
+        except Exception as e:
+            if self.verbose:
+                logger.error(e)
+            props.append(float("nan"))
+    props = np.asarray(props)
+    if self.normalize:
+        return self.compute_normalization(props)
+    return props
+
+
+
+ +
+ + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Get state of the scaffold key function

+ +
+ Source code in molfeat/calc/skeys.py +
175
+176
+177
+178
+179
+180
+181
def __getstate__(self):
+    """Get state of the scaffold key function"""
+    state = {}
+    state["normalize"] = self.normalize
+    state["verbose"] = self.verbose
+    state["use_scaffold"] = self.use_scaffold
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ __init__(normalize=False, verbose=False, use_scaffold=False, **kwargs) + +

+ + +
+ +

Init of the scaffold key function

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
normalize + bool + +
+

whether to normalize the value of the feature

+
+
+ False +
verbose + bool + +
+

whether to log errors

+
+
+ False +
use_scaffold + bool + +
+

whether to convert the molecule into scaffold first

+
+
+ False +
+ +
+ Source code in molfeat/calc/skeys.py +
160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
def __init__(
+    self, normalize: bool = False, verbose: bool = False, use_scaffold: bool = False, **kwargs
+):
+    """
+    Init of the scaffold key function
+
+    Args:
+        normalize: whether to normalize the value of the feature
+        verbose: whether to log errors
+        use_scaffold: whether to convert the molecule into scaffold first
+    """
+    self.normalize = normalize
+    self.verbose = verbose
+    self.use_scaffold = use_scaffold
+
+
+
+ +
+ + +
+ + + + +

+ abs_scaffold_format_charge(mol) + +

+ + +
+ +
    +
  1. absolute value of the scaffold formal charge
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
291
+292
+293
+294
def abs_scaffold_format_charge(self, mol: dm.Mol):
+    """17. absolute value of the scaffold formal charge"""
+    charge = GetFormalCharge(mol)
+    return abs(charge)
+
+
+
+ +
+ + +
+ + + + +

+ compute_normalization(features) + + + classmethod + + +

+ + +
+ +

Normalize input features. The normalization parameters are +computed by the scaffolds of 2.1M molecules from CHEMBL 29.

+ +
+ Source code in molfeat/calc/skeys.py +
186
+187
+188
+189
+190
+191
@classmethod
+def compute_normalization(cls, features: np.ndarray):
+    """Normalize input features. The normalization parameters are
+    computed by the scaffolds of 2.1M molecules from CHEMBL 29.
+    """
+    return (features - cls.NORM_PARAMS["mean"]) / cls.NORM_PARAMS["std"]
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_at_least_2_nei_more_than_2_conn(mol) + +

+ + +
+ +
    +
  1. Number of atoms where at least 2 connected atoms have more than 2 connections
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
283
+284
+285
+286
+287
+288
+289
def n_atom_at_least_2_nei_more_than_2_conn(self, mol: dm.Mol):
+    """16. Number of atoms where at least 2 connected atoms have more than 2 connections"""
+    n_atoms = 0
+    for atom in mol.GetAtoms():
+        tmp = [x for x in atom.GetNeighbors() if len(x.GetNeighbors()) > 2]
+        n_atoms += len(tmp) > 2
+    return n_atoms
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_exocyclic(mol) + +

+ + +
+ +
    +
  1. number of exocyclic atoms (connected by multiple bonds to a ring)
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
225
+226
+227
+228
def n_atom_exocyclic(self, mol: dm.Mol):
+    """5. number of exocyclic atoms (connected by multiple bonds to a ring)"""
+    sm = dm.from_smarts("[!r;!$(*-[r])&$(*~[r])]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_in_chain(mol) + +

+ + +
+ +
    +
  1. number atoms in chains (not counting double-connected exo-chain atoms)
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
220
+221
+222
+223
def n_atom_in_chain(self, mol: dm.Mol):
+    """4. number atoms in chains (not counting double-connected exo-chain atoms)"""
+    sm = dm.from_smarts("[!r;!$(*=[r])]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_in_conjugated_ring(mol) + +

+ + +
+ +
    +
  1. number of atoms in conjugated rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
198
+199
+200
+201
+202
+203
+204
+205
def n_atom_in_conjugated_ring(self, mol: dm.Mol):
+    """2. number of atoms in conjugated rings"""
+    ri = mol.GetRingInfo()
+    n = 0
+    for ring in ri.AtomRings():
+        if _is_ring_fully_conjugated(mol, ring):
+            n += len(ring)
+    return n
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_in_rings(mol) + +

+ + +
+ +
    +
  1. number of ring atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
193
+194
+195
+196
def n_atom_in_rings(self, mol: dm.Mol):
+    """1. number of ring atoms"""
+    sm = dm.from_smarts("[r]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_atom_spiro_atoms(mol) + +

+ + +
+ +
    +
  1. number of spiro atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
266
+267
+268
def n_atom_spiro_atoms(self, mol: dm.Mol):
+    """13. number of spiro atoms"""
+    return Desc.CalcNumSpiroAtoms(mol)
+
+
+
+ +
+ + +
+ + + + +

+ n_atoms_not_in_conjugated_ring(mol) + +

+ + +
+ +
    +
  1. number of atoms not in conjugated rings +(i.e. atoms in aliphatic rings and non-ring atoms)
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
def n_atoms_not_in_conjugated_ring(self, mol: dm.Mol):
+    """
+    3. number of atoms not in conjugated rings
+    (i.e. atoms in aliphatic rings and non-ring atoms)
+    """
+    # EN: replace conjugation by aromatic
+    ri = mol.GetRingInfo()
+    n = 0
+    for ring in ri.AtomRings():
+        if not _is_ring_fully_conjugated(mol, ring):
+            n += len(ring)
+    return n
+
+
+
+ +
+ + +
+ + + + +

+ n_bonds(mol) + +

+ + +
+ +
    +
  1. number of bonds
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
296
+297
+298
def n_bonds(self, mol: dm.Mol):
+    """18. number of bonds"""
+    return mol.GetNumBonds()
+
+
+
+ +
+ + +
+ + + + +

+ n_bonds_2_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of bonds connecting 2 heteroatoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
312
+313
+314
+315
def n_bonds_2_heteroatoms(self, mol: dm.Mol):
+    """20. number of bonds connecting 2 heteroatoms"""
+    sm = dm.from_smarts("[!#1&!#6]~[!#1&!#6]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_bonds_at_least_3_conn(mol) + +

+ + +
+ +
    +
  1. number of bonds with at least 3 connections on both its atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
322
+323
+324
+325
def n_bonds_at_least_3_conn(self, mol: dm.Mol):
+    """22. number of bonds with at least 3 connections on both its atoms"""
+    sm = dm.from_smarts("[$([!#1](~[!#1])(~[!#1])~[!#1])][$([!#1](~[!#1])(~[!#1])~[!#1])]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_bonds_atoms_with_at_least_one_nei_with_2_conn(mol) + +

+ + +
+ +
    +
  1. number of bonds where both atoms have at least one neighbor +(not considering the bond atoms) with more than 2 connections
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
def n_bonds_atoms_with_at_least_one_nei_with_2_conn(self, mol: dm.Mol):
+    """
+    27. number of bonds where both atoms have at least one neighbor
+    (not considering the bond atoms) with more than 2 connections
+    """
+    result = 0
+    huge_conn = list(
+        itertools.chain(*mol.GetSubstructMatches(dm.from_smarts("[*;!D0;!D1;!D2]"), uniquify=1))
+    )
+    for bond in mol.GetBonds():
+        a_start, a_end = bond.GetBeginAtom(), bond.GetEndAtom()
+        # we need to exclud the bond atom themselves
+        allowed_conn_table = [
+            x for x in huge_conn if x not in [a_start.GetIdx(), a_end.GetIdx()]
+        ]
+        if any([x.GetIdx() in allowed_conn_table for x in a_start.GetNeighbors()]) and any(
+            [y.GetIdx() in allowed_conn_table for y in a_end.GetNeighbors()]
+        ):
+            result += 1
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_carbon_atleast_2_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of carbon atoms connected to at least 2 heteroatoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
275
+276
+277
+278
+279
+280
+281
def n_carbon_atleast_2_heteroatoms(self, mol: dm.Mol):
+    """15. number of carbon atoms connected to at least 2 heteroatoms"""
+    n_atoms = 0
+    for atom in mol.GetAtoms():
+        tmp = [x for x in atom.GetNeighbors() if x.GetAtomicNum() not in [1, 6]]
+        n_atoms += len(tmp) >= 2
+    return n_atoms
+
+
+
+ +
+ + +
+ + + + +

+ n_carbon_het_carbon_het_bonds(mol) + +

+ + +
+ +
    +
  1. number of bonds connecting 2 heteroatoms through 2 carbons
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
317
+318
+319
+320
def n_carbon_het_carbon_het_bonds(self, mol: dm.Mol):
+    """21. number of bonds connecting 2 heteroatoms through 2 carbons"""
+    sm = dm.from_smarts("[!#1&!#6]~[#6]~[#6]~[!#1&!#6]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_exocyclic_single_bonds_carbon(mol) + +

+ + +
+ +
    +
  1. number of exocyclic single bonds where a ring atom is carbon
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
327
+328
+329
+330
def n_exocyclic_single_bonds_carbon(self, mol: dm.Mol):
+    """23. number of exocyclic single bonds where a ring atom is carbon"""
+    sm = dm.from_smarts("[!R;!#1]-[#6;R]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_exocyclic_single_bonds_nitrogen(mol) + +

+ + +
+ +
    +
  1. number of exocyclic single bonds where a ring atom is nitrogen
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
332
+333
+334
+335
def n_exocyclic_single_bonds_nitrogen(self, mol: dm.Mol):
+    """24. number of exocyclic single bonds where a ring atom is nitrogen"""
+    sm = dm.from_smarts("[!R;!#1]-[#7;R]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_heteroatom_more_than_2_conn(mol) + +

+ + +
+ +
    +
  1. number of heteroatoms with more than 2 connections
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
270
+271
+272
+273
def n_heteroatom_more_than_2_conn(self, mol: dm.Mol):
+    """14. number of heteroatoms with more than 2 connections"""
+    sm = dm.from_smarts("[!#1;!#6;!D1!D0;!D2]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of heteroatoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
255
+256
+257
+258
+259
def n_heteroatoms(self, mol: dm.Mol):
+    """11. number of heteroatoms"""
+
+    sm = dm.from_smarts("[!#1&!#6]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_heteroatoms_in_ring(mol) + +

+ + +
+ +
    +
  1. number of heteroatoms in rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
261
+262
+263
+264
def n_heteroatoms_in_ring(self, mol: dm.Mol):
+    """12. number of heteroatoms in rings"""
+    sm = dm.from_smarts("[!#1&!#6&r]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_multiple_non_conj_ring_bonds(mol) + +

+ + +
+ +
    +
  1. number of multiple, nonconjugated ring bonds
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
def n_multiple_non_conj_ring_bonds(self, mol: dm.Mol):
+    """19. number of multiple, nonconjugated ring bonds"""
+    extracted_rings = []
+    nr_multiple_bonds_infcr = 0  # infcr: in not fully conjugated ring
+    rings = Chem.GetSymmSSSR(mol)
+    for i in range(len(rings)):
+        extracted_rings.append(list(rings[i]))
+    for ring in extracted_rings:
+        if not _is_ring_fully_conjugated(mol, ring):
+            nr_multiple_bonds_infcr += _n_multiple_bond_in_ring(mol, ring)
+    return nr_multiple_bonds_infcr
+
+
+
+ +
+ + +
+ + + + +

+ n_nitrogen(mol) + +

+ + +
+ +
    +
  1. number of nitrogen
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
230
+231
+232
+233
def n_nitrogen(self, mol: dm.Mol):
+    """6. number of nitrogen"""
+    sm = dm.from_smarts("[#7]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_nitrogen_in_ring(mol) + +

+ + +
+ +
    +
  1. number of nitrogen in rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
235
+236
+237
+238
def n_nitrogen_in_ring(self, mol: dm.Mol):
+    """7. number of nitrogen in rings"""
+    sm = dm.from_smarts("[#7;r]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_non_ring_bonds_2_conj_rings(mol) + +

+ + +
+ +
    +
  1. number of non-ring bonds connecting 2 nonconjugated rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
def n_non_ring_bonds_2_conj_rings(self, mol: dm.Mol):
+    """25. number of non-ring bonds connecting 2 nonconjugated rings"""
+    # EN: this is interpretated literally as bonds and not path
+    ring_atom_conj_state = _ring_atom_state(mol)
+    sm = dm.from_smarts("[R:1]!@[R:2]")
+    bond_list = mol.GetSubstructMatches(sm, uniquify=True)
+    result = 0
+    for a_start, a_end in bond_list:
+        s_state = ring_atom_conj_state.get(a_start)
+        e_state = ring_atom_conj_state.get(a_end)
+        if False in s_state and False in e_state:
+            result += 1
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_non_ring_bonds_conj_nonconj_rings(mol) + +

+ + +
+ +
    +
  1. number of non-ring bonds connecting 2 rings, +one of them conjugated and one non-conjugated
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
def n_non_ring_bonds_conj_nonconj_rings(self, mol: dm.Mol):
+    """
+    26. number of non-ring bonds connecting 2 rings,
+    one of them conjugated and one non-conjugated
+    """
+    # EN: this is interpretated literally as bonds and not path
+
+    ring_atom_conj_state = _ring_atom_state(mol)
+    sm = dm.from_smarts("[R:1]!@[R:2]")
+    bond_list = mol.GetSubstructMatches(sm, uniquify=True)
+    result = 0
+    for a_start, a_end in bond_list:
+        s_state = ring_atom_conj_state.get(a_start)
+        e_state = ring_atom_conj_state.get(a_end)
+        if (True in s_state and False in e_state) or (False in s_state and True in e_state):
+            result += 1
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_oxygen(mol) + +

+ + +
+ +
    +
  1. number of oxygen
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
240
+241
+242
+243
def n_oxygen(self, mol: dm.Mol):
+    """8. number of oxygen"""
+    sm = dm.from_smarts("[#8]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_oxygen_in_ring(mol) + +

+ + +
+ +
    +
  1. number of oxygen in rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
245
+246
+247
+248
def n_oxygen_in_ring(self, mol: dm.Mol):
+    """9. number of oxygen in rings"""
+    sm = dm.from_smarts("[#8]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system(mol) + +

+ + +
+ +
    +
  1. number of ring systems
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
443
+444
+445
+446
def n_ring_system(self, mol: dm.Mol):
+    """36. number of ring systems"""
+    simple_rings, ring_system, _ = _get_ring_system(mol)
+    return len(ring_system)
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_2_conj_simple_ring(mol) + +

+ + +
+ +
    +
  1. number of rings systems with 2 conjugated simple rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
def n_ring_system_with_2_conj_simple_ring(self, mol: dm.Mol):
+    """38. number of rings systems with 2 conjugated simple rings"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+        result += n_conj == 2
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_2_non_conj_simple_ring(mol) + +

+ + +
+ +
    +
  1. number of rings systems with 2 non-conjugated simple rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
def n_ring_system_with_2_non_conj_simple_ring(self, mol: dm.Mol):
+    """37. number of rings systems with 2 non-conjugated simple rings"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        n_not_conj = sum(not conj_rings_map[rnum] for rnum in ring_set)
+        result += n_not_conj == 2
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_3_conj_simple_ring(mol) + +

+ + +
+ +
    +
  1. number of rings systems with 3 conjugated simple rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
def n_ring_system_with_3_conj_simple_ring(self, mol: dm.Mol):
+    """40. number of rings systems with 3 conjugated simple rings"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+        result += n_conj == 3
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_3_non_conj_simple_ring(mol) + +

+ + +
+ +
    +
  1. number of rings systems with 3 non-conjugated simple rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
def n_ring_system_with_3_non_conj_simple_ring(self, mol: dm.Mol):
+    """41. number of rings systems with 3 non-conjugated simple rings"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        n_not_conj = sum(not conj_rings_map[rnum] for rnum in ring_set)
+        result += n_not_conj == 3
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_conj_non_conj_simple_ring(mol) + +

+ + +
+ +

39 number of ring system containing 2 simple rings, one conjugated and one nonconjugated

+ +
+ Source code in molfeat/calc/skeys.py +
472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
def n_ring_system_with_conj_non_conj_simple_ring(self, mol: dm.Mol):
+    """39 number of ring system containing 2 simple rings, one conjugated and one nonconjugated"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        if len(ring_set) == 2:
+            n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+            result += n_conj == 1
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_ring_system_with_greater_one_conj_nonconj_simple_ring(mol) + +

+ + +
+ +
    +
  1. number of ring system containing 3 simple rings, at least one conjugated and one nonconjugated
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
def n_ring_system_with_greater_one_conj_nonconj_simple_ring(self, mol: dm.Mol):
+    """42. number of ring system containing 3 simple rings, at least one conjugated and one nonconjugated"""
+    simple_rings, _, ring_map = _get_ring_system(mol)
+    conj_rings_map = dict(
+        (i, _is_ring_fully_conjugated(mol, x)) for i, x in enumerate(simple_rings)
+    )
+    result = 0
+    for ring_set in ring_map:
+        if len(ring_set) == 3:
+            n_conj = sum(conj_rings_map[rnum] for rnum in ring_set)
+            result += n_conj in [1, 2]
+    return result
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_non_conj_5_atoms_rings(mol) + +

+ + +
+ +
    +
  1. number of simple non-conjugated rings with 5 atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
425
+426
+427
+428
+429
+430
+431
+432
def n_simple_non_conj_5_atoms_rings(self, mol: dm.Mol):
+    """34. number of simple non-conjugated rings with 5 atoms"""
+    ri = mol.GetRingInfo()
+    n = 0
+    for ring in ri.AtomRings():
+        if not _is_ring_fully_conjugated(mol, ring) and len(ring) == 5:
+            n += 1
+    return n
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_non_conj_6_atoms_rings(mol) + +

+ + +
+ +
    +
  1. number of simple non-conjugated rings with 6 atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
434
+435
+436
+437
+438
+439
+440
+441
def n_simple_non_conj_6_atoms_rings(self, mol: dm.Mol):
+    """35. number of simple non-conjugated rings with 6 atoms"""
+    ri = mol.GetRingInfo()
+    n = 0
+    for ring in ri.AtomRings():
+        if not _is_ring_fully_conjugated(mol, ring) and len(ring) == 6:
+            n += 1
+    return n
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_rings(mol) + +

+ + +
+ +
    +
  1. number of simple rings
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
390
+391
+392
+393
def n_simple_rings(self, mol: dm.Mol):
+    """28. number of simple rings"""
+    ri = mol.GetRingInfo()
+    return ri.NumRings()
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_rings_1_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of simple rings with 1 heteroatom
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
407
+408
+409
+410
+411
def n_simple_rings_1_heteroatoms(self, mol: dm.Mol):
+    """31. number of simple rings with 1 heteroatom"""
+    ri = mol.GetRingInfo()
+    n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+    return sum(1 for x in n_heteros if x == 1)
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_rings_2_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of simple rings with 2 heteroatom
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
413
+414
+415
+416
+417
def n_simple_rings_2_heteroatoms(self, mol: dm.Mol):
+    """32. number of simple rings with 2 heteroatom"""
+    ri = mol.GetRingInfo()
+    n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+    return sum(1 for x in n_heteros if x == 2)
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_rings_at_least_3_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of simple rings with 3 or more heteroatoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
419
+420
+421
+422
+423
def n_simple_rings_at_least_3_heteroatoms(self, mol: dm.Mol):
+    """33. number of simple rings with 3 or more heteroatoms"""
+    ri = mol.GetRingInfo()
+    n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+    return sum(1 for x in n_heteros if x >= 3)
+
+
+
+ +
+ + +
+ + + + +

+ n_simple_rings_no_heteroatoms(mol) + +

+ + +
+ +
    +
  1. number of simple rings with no heteroatoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
401
+402
+403
+404
+405
def n_simple_rings_no_heteroatoms(self, mol: dm.Mol):
+    """30. number of simple rings with no heteroatoms"""
+    ri = mol.GetRingInfo()
+    n_heteros = _count_heteroatom_per_ring(mol, ri.AtomRings())
+    return sum(1 for x in n_heteros if x == 0)
+
+
+
+ +
+ + +
+ + + + +

+ n_sulfur(mol) + +

+ + +
+ +
    +
  1. number of sulfur atoms
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
250
+251
+252
+253
def n_sulfur(self, mol: dm.Mol):
+    """10. number of sulfur atoms"""
+    sm = dm.from_smarts("[#16]")
+    return len(mol.GetSubstructMatches(sm, uniquify=True))
+
+
+
+ +
+ + +
+ + + + +

+ size_largest_ring(mol) + +

+ + +
+ +
    +
  1. Size of the largest ring
  2. +
+ +
+ Source code in molfeat/calc/skeys.py +
395
+396
+397
+398
+399
def size_largest_ring(self, mol: dm.Mol):
+    """29. Size of the largest ring"""
+    ri = mol.GetRingInfo()
+    max_ring_size = max((len(r) for r in ri.AtomRings()), default=0)
+    return max_ring_size
+
+
+
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + +

+ skdistance(sk1, sk2, weights=None, cdist=False) + +

+ + +
+ +

Compute the scaffold distance between two scaffold keys +as described in https://pubs.acs.org/doi/abs/10.1021/ci5001983. +The input features are expected to be normalized beforehand (see paper)

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
sk1 + ndarray + +
+

scaffold key 1

+
+
+ required +
sk2 + ndarray + +
+

scaffold key 2

+
+
+ required +
weights + Optional[ndarray] + +
+

how to weight each of the features. By default rank ordering is used.

+
+
+ None +
cdist + bool + +
+

whether to compute the features on a batched of inputs (expected 2D)

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dist + float + +
+

distance between two scaffold keys

+
+
+ +
+ Source code in molfeat/calc/skeys.py +
556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
def skdistance(
+    sk1: np.ndarray,
+    sk2: np.ndarray,
+    weights: Optional[np.ndarray] = None,
+    cdist: bool = False,
+):
+    """Compute the scaffold distance between two scaffold keys
+    as described in https://pubs.acs.org/doi/abs/10.1021/ci5001983.
+    The input features are expected to be normalized beforehand (see paper)
+
+    Args:
+        sk1: scaffold key 1
+        sk2: scaffold key 2
+        weights: how to weight each of the features. By default rank ordering is used.
+        cdist: whether to compute the features on a batched of inputs (expected 2D)
+
+    Returns:
+        dist (float): distance between two scaffold keys
+    """
+    if weights is None:
+        weights = 1 / (np.arange(sk1.shape[-1]) + 1)
+
+    if cdist:
+        sk1 = np.atleast_2d(sk1)
+        sk2 = np.atleast_2d(sk2)
+        val = np.abs(sk1[:, None] - sk2[:]) ** 1.5
+        dist = np.sum(val * weights, axis=-1)
+    else:
+        if any((sk.ndim > 1 and sk.shape[0] != 1) for sk in [sk1, sk2]):
+            raise ValueError("`cdist` mode was not detected, you need to provide single vectors")
+        val = np.abs(sk1 - sk2) ** 1.5
+        dist = np.sum(val * weights)
+    return dist
+
+
+
+ +
+ + + +
+ +
+ +

+

Shape

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ElectroShapeDescriptors + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Compute Electroshape descriptors as described by

+

Armstrong et al. ElectroShape: fast molecular similarity calculations incorporating shape, chirality and electrostatics. +J Comput Aided Mol Des 24, 789-801 (2010). http://dx.doi.org/doi:10.1007/s10822-010-9374-0

+ +
+ Source code in molfeat/calc/shape.py +
 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
class ElectroShapeDescriptors(SerializableCalculator):
+    """Compute Electroshape descriptors as described by
+
+    Armstrong et al. ElectroShape: fast molecular similarity calculations incorporating shape, chirality and electrostatics.
+    J Comput Aided Mol Des 24, 789-801 (2010). http://dx.doi.org/doi:10.1007/s10822-010-9374-0
+    """
+
+    SUPPORTED_CHARGE_MODELS = ["gasteiger", "tripos", "mmff94", "formal"]
+
+    def __init__(
+        self,
+        charge_model: str = "gasteiger",
+        replace_nan: bool = False,
+        electron_scaling: float = 25.0,
+        **kwargs,
+    ):
+        """Constructor for ElectroShape descriptor
+
+        Args:
+            charge_model: charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger".
+                Note that formal charges will be computed on the fly if not provided in the input molecules.
+                The `tripos` charge models comes from TRIPOS force field and are often parsed from mol2 files.
+            replace_nan: whether to replace NaN values. Defaults False
+            electron_scaling: scaling factor to convert electron charges to Angstroms. Defaults to 25.0.
+        """
+
+        self.charge_model = charge_model
+        self.replace_nan = replace_nan
+        self.electron_scaling = electron_scaling
+        self._columns = None
+
+    @property
+    def columns(self):
+        """
+        Get the name of all the descriptors of this calculator
+        """
+        if self._columns is None:
+            self._columns = []
+            for i in range(1, 6):
+                self._columns.extend([f"dist-{i}-mean", f"dist-{i}-std", f"dist-{i}-crb"])
+
+        return self._columns
+
+    def __getstate__(self):
+        state = {}
+        state["charge_model"] = self.charge_model
+        state["replace_nan"] = self.replace_nan
+        state["electron_scaling"] = self.electron_scaling
+        state["_columns"] = self._columns
+        return state
+
+    def __len__(self):
+        """Return the length of the calculator"""
+        return len(self.columns)
+
+    @staticmethod
+    def compute_charge(mol: Union[dm.Mol, str], charge_model: str = None):
+        """
+        Get the molecular charge of the molecule.
+
+        Args:
+            charge_model: charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger".
+        """
+
+        if charge_model not in ElectroShapeDescriptors.SUPPORTED_CHARGE_MODELS:
+            raise ValueError(
+                f"Unknown charge model {charge_model}. You should provide one of {ElectroShapeDescriptors.SUPPORTED_CHARGE_MODELS}"
+            )
+        mol = dm.to_mol(mol)
+        atom_charge = []
+        atom_list = list(mol.GetAtoms())
+
+        # force compute the partial charges if not provided
+        if charge_model == "gasteiger" and not atom_list[0].HasProp("_GasteigerCharge"):
+            rdPartialCharges.ComputeGasteigerCharges(mol)
+        elif charge_model == "mmff94" and not atom_list[0].HasProp("_MMFF94Charge"):
+            ff_infos = rdForceFieldHelpers.MMFFGetMoleculeProperties(mol)
+            for i, atom in enumerate(atom_list):
+                atom.SetDoubleProp("_MMFF94Charge", ff_infos.GetMMFFPartialCharge(i))
+
+        for atom in mol.GetAtoms():
+            if charge_model == "formal":
+                atom_charge.append(atom.GetFormalCharge())
+            elif charge_model == "gasteiger":
+                atom_charge.append(atom.GetDoubleProp("_GasteigerCharge"))
+            elif charge_model == "mmff94":
+                atom_charge.append(atom.GetDoubleProp("_MMFF94Charge"))
+            elif charge_model == "tripos":
+                atom_charge.append(atom.GetDoubleProp("_TriposPartialCharge"))
+        return np.asarray(atom_charge)
+
+    @requires_conformer
+    def __call__(self, mol: Union[dm.Mol, str], conformer_id: Optional[int] = -1):
+        r"""
+        Get rdkit 3D descriptors for a molecule
+
+        Args:
+            mol: the molecule of interest
+            conformer_id (int, optional): Optional conformer id. Defaults to -1.
+
+        Returns:
+            shape_descriptor (np.ndarray): computed shape descriptor
+        """
+
+        mol = dm.to_mol(mol)
+        coords = mol.GetConformer(conformer_id).GetPositions()
+        charge = self.compute_charge(mol, self.charge_model)
+        if self.replace_nan:
+            charge = np.nan_to_num(charge)
+
+        desc_4d = np.column_stack((coords, charge * self.electron_scaling))
+
+        c1 = desc_4d.mean(axis=0)
+        distances_c1 = norm(desc_4d - c1, axis=1)
+
+        c2 = desc_4d[distances_c1.argmax()]  # atom position furthest from c1
+        distances_c2 = norm(desc_4d - c2, axis=1)
+
+        c3 = desc_4d[distances_c2.argmax()]  # atom position furthest from c2
+        distances_c3 = norm(desc_4d - c3, axis=1)
+
+        vector_a = c2 - c1
+        vector_b = c3 - c1
+        vector_as = vector_a[:3]  # spatial parts of these vectors
+        vector_bs = vector_b[:3]  # spatial parts of these vectors
+        cross_ab = np.cross(vector_as, vector_bs)
+        vector_c = (norm(vector_a) / (2 * norm(cross_ab))) * cross_ab
+        vector_c1s = c1[:3]
+
+        max_charge = np.array(np.amax(charge) * self.electron_scaling)
+        min_charge = np.array(np.amin(charge) * self.electron_scaling)
+
+        c4 = np.append(vector_c1s + vector_c, max_charge)
+        c5 = np.append(vector_c1s + vector_c, min_charge)
+
+        distances_c4 = norm(desc_4d - c4, axis=1)
+        distances_c5 = norm(desc_4d - c5, axis=1)
+
+        distances_list = [
+            distances_c1,
+            distances_c2,
+            distances_c3,
+            distances_c4,
+            distances_c5,
+        ]
+
+        shape_descriptor = np.zeros(15)
+
+        i = 0
+        for distances in distances_list:
+            mean = np.mean(distances)
+            shape_descriptor[0 + i] = mean
+            shape_descriptor[1 + i] = np.std(distances)
+            shape_descriptor[2 + i] = cbrt(np.sum(((distances - mean) ** 3) / distances.size))
+            i += 3
+        if self.replace_nan:
+            return np.nan_to_num(shape_descriptor)
+        return shape_descriptor
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the name of all the descriptors of this calculator

+
+ +
+ + + + +
+ + + + +

+ __call__(mol, conformer_id=-1) + +

+ + +
+ +

Get rdkit 3D descriptors for a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
conformer_id + int + +
+

Optional conformer id. Defaults to -1.

+
+
+ -1 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
shape_descriptor + ndarray + +
+

computed shape descriptor

+
+
+ +
+ Source code in molfeat/calc/shape.py +
170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
@requires_conformer
+def __call__(self, mol: Union[dm.Mol, str], conformer_id: Optional[int] = -1):
+    r"""
+    Get rdkit 3D descriptors for a molecule
+
+    Args:
+        mol: the molecule of interest
+        conformer_id (int, optional): Optional conformer id. Defaults to -1.
+
+    Returns:
+        shape_descriptor (np.ndarray): computed shape descriptor
+    """
+
+    mol = dm.to_mol(mol)
+    coords = mol.GetConformer(conformer_id).GetPositions()
+    charge = self.compute_charge(mol, self.charge_model)
+    if self.replace_nan:
+        charge = np.nan_to_num(charge)
+
+    desc_4d = np.column_stack((coords, charge * self.electron_scaling))
+
+    c1 = desc_4d.mean(axis=0)
+    distances_c1 = norm(desc_4d - c1, axis=1)
+
+    c2 = desc_4d[distances_c1.argmax()]  # atom position furthest from c1
+    distances_c2 = norm(desc_4d - c2, axis=1)
+
+    c3 = desc_4d[distances_c2.argmax()]  # atom position furthest from c2
+    distances_c3 = norm(desc_4d - c3, axis=1)
+
+    vector_a = c2 - c1
+    vector_b = c3 - c1
+    vector_as = vector_a[:3]  # spatial parts of these vectors
+    vector_bs = vector_b[:3]  # spatial parts of these vectors
+    cross_ab = np.cross(vector_as, vector_bs)
+    vector_c = (norm(vector_a) / (2 * norm(cross_ab))) * cross_ab
+    vector_c1s = c1[:3]
+
+    max_charge = np.array(np.amax(charge) * self.electron_scaling)
+    min_charge = np.array(np.amin(charge) * self.electron_scaling)
+
+    c4 = np.append(vector_c1s + vector_c, max_charge)
+    c5 = np.append(vector_c1s + vector_c, min_charge)
+
+    distances_c4 = norm(desc_4d - c4, axis=1)
+    distances_c5 = norm(desc_4d - c5, axis=1)
+
+    distances_list = [
+        distances_c1,
+        distances_c2,
+        distances_c3,
+        distances_c4,
+        distances_c5,
+    ]
+
+    shape_descriptor = np.zeros(15)
+
+    i = 0
+    for distances in distances_list:
+        mean = np.mean(distances)
+        shape_descriptor[0 + i] = mean
+        shape_descriptor[1 + i] = np.std(distances)
+        shape_descriptor[2 + i] = cbrt(np.sum(((distances - mean) ** 3) / distances.size))
+        i += 3
+    if self.replace_nan:
+        return np.nan_to_num(shape_descriptor)
+    return shape_descriptor
+
+
+
+ +
+ + +
+ + + + +

+ __init__(charge_model='gasteiger', replace_nan=False, electron_scaling=25.0, **kwargs) + +

+ + +
+ +

Constructor for ElectroShape descriptor

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
charge_model + str + +
+

charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger". +Note that formal charges will be computed on the fly if not provided in the input molecules. +The tripos charge models comes from TRIPOS force field and are often parsed from mol2 files.

+
+
+ 'gasteiger' +
replace_nan + bool + +
+

whether to replace NaN values. Defaults False

+
+
+ False +
electron_scaling + float + +
+

scaling factor to convert electron charges to Angstroms. Defaults to 25.0.

+
+
+ 25.0 +
+ +
+ Source code in molfeat/calc/shape.py +
 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
def __init__(
+    self,
+    charge_model: str = "gasteiger",
+    replace_nan: bool = False,
+    electron_scaling: float = 25.0,
+    **kwargs,
+):
+    """Constructor for ElectroShape descriptor
+
+    Args:
+        charge_model: charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger".
+            Note that formal charges will be computed on the fly if not provided in the input molecules.
+            The `tripos` charge models comes from TRIPOS force field and are often parsed from mol2 files.
+        replace_nan: whether to replace NaN values. Defaults False
+        electron_scaling: scaling factor to convert electron charges to Angstroms. Defaults to 25.0.
+    """
+
+    self.charge_model = charge_model
+    self.replace_nan = replace_nan
+    self.electron_scaling = electron_scaling
+    self._columns = None
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Return the length of the calculator

+ +
+ Source code in molfeat/calc/shape.py +
130
+131
+132
def __len__(self):
+    """Return the length of the calculator"""
+    return len(self.columns)
+
+
+
+ +
+ + +
+ + + + +

+ compute_charge(mol, charge_model=None) + + + staticmethod + + +

+ + +
+ +

Get the molecular charge of the molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
charge_model + str + +
+

charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger".

+
+
+ None +
+ +
+ Source code in molfeat/calc/shape.py +
134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
@staticmethod
+def compute_charge(mol: Union[dm.Mol, str], charge_model: str = None):
+    """
+    Get the molecular charge of the molecule.
+
+    Args:
+        charge_model: charge model to use. One of ('gasteiger', 'tripos', 'mmff94', 'formal'). Defaults to "gasteiger".
+    """
+
+    if charge_model not in ElectroShapeDescriptors.SUPPORTED_CHARGE_MODELS:
+        raise ValueError(
+            f"Unknown charge model {charge_model}. You should provide one of {ElectroShapeDescriptors.SUPPORTED_CHARGE_MODELS}"
+        )
+    mol = dm.to_mol(mol)
+    atom_charge = []
+    atom_list = list(mol.GetAtoms())
+
+    # force compute the partial charges if not provided
+    if charge_model == "gasteiger" and not atom_list[0].HasProp("_GasteigerCharge"):
+        rdPartialCharges.ComputeGasteigerCharges(mol)
+    elif charge_model == "mmff94" and not atom_list[0].HasProp("_MMFF94Charge"):
+        ff_infos = rdForceFieldHelpers.MMFFGetMoleculeProperties(mol)
+        for i, atom in enumerate(atom_list):
+            atom.SetDoubleProp("_MMFF94Charge", ff_infos.GetMMFFPartialCharge(i))
+
+    for atom in mol.GetAtoms():
+        if charge_model == "formal":
+            atom_charge.append(atom.GetFormalCharge())
+        elif charge_model == "gasteiger":
+            atom_charge.append(atom.GetDoubleProp("_GasteigerCharge"))
+        elif charge_model == "mmff94":
+            atom_charge.append(atom.GetDoubleProp("_MMFF94Charge"))
+        elif charge_model == "tripos":
+            atom_charge.append(atom.GetDoubleProp("_TriposPartialCharge"))
+    return np.asarray(atom_charge)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ USRDescriptors + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Descriptors for the shape of a molecule.

+

!!! note: + The following shape descriptors are offered: + * USR: UltraFast Shape Recognition + * USRCAT: Ultrafast Shape Recognition with CREDO Atom Types

+ +
+ Source code in molfeat/calc/shape.py +
13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
class USRDescriptors(SerializableCalculator):
+    """Descriptors for the shape of a molecule.
+
+    !!! note:
+        The following shape descriptors are offered:
+            * USR: UltraFast Shape Recognition
+            * USRCAT: Ultrafast Shape Recognition with CREDO Atom Types
+    """
+
+    def __init__(self, method: str = "USR", replace_nan: bool = False, **kwargs):
+        """Constructor for ShapeDescriptors
+
+        Args:
+            method: Shape descriptor method to use. One of 'USR', 'USRCAT'. Default to 'USR'
+            replace_nan: Whether to replace nan or infinite values. Defaults to False.
+        """
+        self.method = method.upper()
+        if self.method not in ["USR", "USRCAT"]:
+            raise ValueError(f"Shape descriptor {self.method} is not supported")
+        self.replace_nan = replace_nan
+        self._columns = None
+
+    def __getstate__(self):
+        state = {}
+        state["method"] = self.method
+        state["replace_nan"] = self.replace_nan
+        state["_columns"] = self._columns
+        return state
+
+    @property
+    def columns(self):
+        """
+        Get the name of all the descriptors of this calculator
+        """
+        if self._columns is None:
+            if self.method == "USR":
+                self._columns = [f"usr-{i}" for i in range(1, 13)]
+            elif self.method == "USRCAT":
+                self._columns = [f"usr-{i}" for i in range(1, 61)]
+        return self._columns
+
+    def __len__(self):
+        """Compute descriptors length"""
+        return len(self.columns)
+
+    @requires_conformer
+    def __call__(self, mol: Union[dm.Mol, str], conformer_id: Optional[int] = -1) -> np.ndarray:
+        r"""
+        Get rdkit 3D descriptors for a molecule
+
+        Args:
+            mol: the molecule of interest
+            conformer_id: Optional conformer id. Defaults to -1.
+
+        Returns:
+            shape_descriptors: list of computed molecular descriptors
+        """
+        if self.method == "USR":
+            shape_descr = rdMolDescriptors.GetUSR(mol, confId=conformer_id)
+        elif self.method == "USRCAT":
+            shape_descr = rdMolDescriptors.GetUSRCAT(mol, confId=conformer_id)
+        if self.replace_nan:
+            shape_descr = np.nan_to_num(shape_descr, self.replace_nan)
+        return np.asarray(shape_descr)
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the name of all the descriptors of this calculator

+
+ +
+ + + + +
+ + + + +

+ __call__(mol, conformer_id=-1) + +

+ + +
+ +

Get rdkit 3D descriptors for a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
conformer_id + Optional[int] + +
+

Optional conformer id. Defaults to -1.

+
+
+ -1 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
shape_descriptors + ndarray + +
+

list of computed molecular descriptors

+
+
+ +
+ Source code in molfeat/calc/shape.py +
58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
@requires_conformer
+def __call__(self, mol: Union[dm.Mol, str], conformer_id: Optional[int] = -1) -> np.ndarray:
+    r"""
+    Get rdkit 3D descriptors for a molecule
+
+    Args:
+        mol: the molecule of interest
+        conformer_id: Optional conformer id. Defaults to -1.
+
+    Returns:
+        shape_descriptors: list of computed molecular descriptors
+    """
+    if self.method == "USR":
+        shape_descr = rdMolDescriptors.GetUSR(mol, confId=conformer_id)
+    elif self.method == "USRCAT":
+        shape_descr = rdMolDescriptors.GetUSRCAT(mol, confId=conformer_id)
+    if self.replace_nan:
+        shape_descr = np.nan_to_num(shape_descr, self.replace_nan)
+    return np.asarray(shape_descr)
+
+
+
+ +
+ + +
+ + + + +

+ __init__(method='USR', replace_nan=False, **kwargs) + +

+ + +
+ +

Constructor for ShapeDescriptors

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
method + str + +
+

Shape descriptor method to use. One of 'USR', 'USRCAT'. Default to 'USR'

+
+
+ 'USR' +
replace_nan + bool + +
+

Whether to replace nan or infinite values. Defaults to False.

+
+
+ False +
+ +
+ Source code in molfeat/calc/shape.py +
22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
def __init__(self, method: str = "USR", replace_nan: bool = False, **kwargs):
+    """Constructor for ShapeDescriptors
+
+    Args:
+        method: Shape descriptor method to use. One of 'USR', 'USRCAT'. Default to 'USR'
+        replace_nan: Whether to replace nan or infinite values. Defaults to False.
+    """
+    self.method = method.upper()
+    if self.method not in ["USR", "USRCAT"]:
+        raise ValueError(f"Shape descriptor {self.method} is not supported")
+    self.replace_nan = replace_nan
+    self._columns = None
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Compute descriptors length

+ +
+ Source code in molfeat/calc/shape.py +
54
+55
+56
def __len__(self):
+    """Compute descriptors length"""
+    return len(self.columns)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + +

+ usrdistance(shape_1, shape_2, weights=None) + +

+ + +
+ +

Computes similarity between molecules

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
shape_1 + +
+

USR shape descriptor of first molecule

+
+
+ required +
shape_2 + +
+

USR shape descriptor

+
+
+ required +
weights + Optional[List[float]] + +
+

List of scaling factor to use for

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dist + +
+

Distance [0-1] between shapes of molecules, 0 indicates identical molecules

+
+
+ +
+ Source code in molfeat/calc/shape.py +
239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
def usrdistance(
+    shape_1,
+    shape_2,
+    weights: Optional[List[float]] = None,
+):
+    """Computes similarity between molecules
+
+    Args:
+        shape_1: USR shape descriptor of first molecule
+        shape_2: USR shape descriptor
+        weights: List of scaling factor to use for
+
+    Returns:
+        dist: Distance [0-1] between shapes of molecules, 0 indicates identical molecules
+    """
+
+    # case for usr shape descriptors
+    if weights is None:
+        weights = []
+    if (
+        (shape_1.shape[-1] == shape_2.shape[-1] == 12)
+        or (shape_1.shape[-1] == shape_2.shape[-1] == 60)
+        or (shape_1.shape[-1] == shape_2.shape[-1] == 15)
+    ):
+        dist = rdMolDescriptors.GetUSRScore(shape_1, shape_2, weights=weights)
+        return dist
+
+    raise Exception(
+        "Given vectors are not valid USR shape descriptors "
+        "or come from different methods. Correct vector lengths"
+        "are: 12 for USR, 60 for USRCAT, 15 for Electroshape"
+    )
+
+
+
+ +
+ + + +
+ +
+ +

+

Atoms Featurizer

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ AtomCalculator + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

Base class for computing atom properties compatible with DGLLife

+ +
+ Source code in molfeat/calc/atom.py +
 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
class AtomCalculator(SerializableCalculator):
+    """
+    Base class for computing atom properties compatible with DGLLife
+    """
+
+    DEFAULT_FEATURIZER = {
+        "atom_one_hot": atom_one_hot,
+        "atom_degree_one_hot": atom_degree_one_hot,
+        "atom_implicit_valence_one_hot": atom_implicit_valence_one_hot,
+        "atom_hybridization_one_hot": atom_hybridization_one_hot,
+        "atom_is_aromatic": atom_is_aromatic,
+        "atom_formal_charge": atom_formal_charge,
+        "atom_num_radical_electrons": atom_num_radical_electrons,
+        "atom_is_in_ring": atom_is_in_ring,
+        "atom_total_num_H_one_hot": atom_total_num_H_one_hot,
+        "atom_chiral_tag_one_hot": atom_chiral_tag_one_hot,
+        "atom_is_chiral_center": atom_is_chiral_center,
+    }
+
+    def __init__(
+        self,
+        featurizer_funcs: Dict[str, Callable] = None,
+        concat: bool = True,
+        name: str = "hv",
+    ):
+        """
+        Init function of the atom property calculator
+
+        Args:
+            featurizer_funcs : Mapping of feature name to the featurization function.
+                For compatibility a list of callable/function is still accepted, and the corresponding
+                featurizer name will be automatically generated. Each function is of signature
+                ``func(dm.Atom) -> list or 1D numpy array``.
+            concat: Whether to concat all the data into a single value in the output dict
+            name: Name of the key name of the concatenated features
+        """
+        self._input_kwargs = locals().copy()
+        self._input_kwargs.pop("self")
+        # we also remove the featurizer funcs
+        self._input_kwargs.pop("featurizer_funcs", None)
+        self._toy_mol = dm.to_mol("CCO")
+        self._feat_sizes = dict()
+        if featurizer_funcs is None:
+            featurizer_funcs = self.DEFAULT_FEATURIZER
+        if not isinstance(featurizer_funcs, dict):
+            get_name = lambda x: getattr(x, "__name__", repr(x))
+            featurizer_funcs = dict((get_name(x), x) for x in featurizer_funcs)
+        self.featurizer_funcs = featurizer_funcs
+        for k in self.featurizer_funcs.keys():
+            self.feat_size(feat_name=k)
+        self.concat = concat
+        self.name = name
+
+    def to_state_dict(self):
+        """
+        Convert the Atom calculator to a state dict
+        Due to some constraints and cross-version compatibility,  the featurizer functions
+        need to be pickled and not just return a list
+        """
+        state_dict = {}
+        state_dict["name"] = self.__class__.__name__
+        state_dict["module"] = self.__class__.__module__
+        state_dict["args"] = self._input_kwargs
+        featurizer_fn_pickled = {}
+        for fname, ffunc in self.featurizer_funcs.items():
+            featurizer_fn_pickled[fname] = fn_to_hex(ffunc)
+        state_dict["args"]["featurizer_funcs"] = featurizer_fn_pickled
+        state_dict["_molfeat_version"] = MOLFEAT_VERSION
+
+        signature = inspect.signature(self.__init__)
+        val = {
+            k: v.default
+            for k, v in signature.parameters.items()
+            # if v.default is not inspect.Parameter.empty
+        }
+        to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+        for k in to_remove:
+            state_dict["args"].pop(k)
+
+        return state_dict
+
+    @classmethod
+    def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+        """Create an instance of an atom calculator from a state dict
+
+        Args:
+            state_dict: state dictionary to use to create the atom calculator
+            override_args: optional dictionary of arguments to override the ones in the state dict
+                at construction of the new object
+        """
+        # EN: at this moment, version compatibility is not enforced
+        cls_name = state_dict.get("name", cls.__name__)
+        module_name = state_dict.get("module", cls.__module__)
+        module = importlib.import_module(module_name)
+        klass = getattr(module, cls_name)
+        kwargs = state_dict["args"].copy()
+        # now we need to unpickle the featurizer functions
+        featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+        if featurizer_fn_pickled is not None:
+            featurizer_fn_loaded = {}
+            for k, v in featurizer_fn_pickled.items():
+                featurizer_fn_loaded[k] = hex_to_fn(v)
+            kwargs["featurizer_funcs"] = featurizer_fn_loaded
+        kwargs.update(**(override_args or {}))
+        return klass(**kwargs)
+
+    def _concat(self, data_dict: Dict[str, Iterable]):
+        """Concatenate the data into a single value
+
+        Args:
+            data_dict: mapping of feature names to tensor/arrays
+        Returns:
+            concatenated_dict: a dict with a single key where all array have been concatenated
+        """
+        return concat_dict(data_dict, new_name=self.name)
+
+    def feat_size(self, feat_name=None):
+        """Get the feature size for ``feat_name``.
+
+        When there is only one feature, users do not need to provide ``feat_name``.
+
+        Args:
+            feat_name: Feature for query.
+
+        Returns:
+            int: Feature size for the feature with name ``feat_name``. Default to None.
+        """
+        if feat_name is None:
+            assert (
+                len(self.featurizer_funcs) == 1
+            ), "feat_name should be provided if there are more than one features"
+            feat_name = list(self.featurizer_funcs.keys())[0]
+
+        if feat_name not in self.featurizer_funcs:
+            raise ValueError(
+                "Expect feat_name to be in {}, got {}".format(
+                    list(self.featurizer_funcs.keys()), feat_name
+                )
+            )
+
+        if feat_name not in self._feat_sizes:
+            atom = self._toy_mol.GetAtomWithIdx(0)
+            self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](atom))
+        return self._feat_sizes[feat_name]
+
+    def __len__(self):
+        """Get length of the property estimator"""
+        return sum(v for k, v in self._feat_sizes.items() if k != self.name)
+
+    def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None):
+        """
+        Get rdkit basic descriptors for a molecule
+
+        Args:
+            mol: the molecule of interest
+            dtype: requested data type
+
+        Returns:
+            dict:  For each function in self.featurizer_funcs with the key ``k``, store the computed feature under the key ``k``.
+        """
+        mol = dm.to_mol(mol)
+        num_atoms = mol.GetNumAtoms()
+        atom_features = defaultdict(list)
+
+        # Compute features for each atom
+        for i in range(num_atoms):
+            atom = mol.GetAtomWithIdx(i)
+            for feat_name, feat_func in self.featurizer_funcs.items():
+                atom_features[feat_name].append(feat_func(atom))
+
+        # Stack the features and convert them to float arrays
+        processed_features = dict()
+        for feat_name, feat_list in atom_features.items():
+            feat = np.stack(feat_list).astype(np.float32)
+            processed_features[feat_name] = feat
+
+        if self.concat:
+            processed_features = self._concat(processed_features)
+
+        if dtype is not None:
+            for feat_name, feat in processed_features.items():
+                feat = datatype.cast(feat, dtype=dtype)
+                processed_features[feat_name] = feat
+
+        return processed_features
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol, dtype=None) + +

+ + +
+ +

Get rdkit basic descriptors for a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
dtype + Callable + +
+

requested data type

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dict + +
+

For each function in self.featurizer_funcs with the key k, store the computed feature under the key k.

+
+
+ +
+ Source code in molfeat/calc/atom.py +
195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None):
+    """
+    Get rdkit basic descriptors for a molecule
+
+    Args:
+        mol: the molecule of interest
+        dtype: requested data type
+
+    Returns:
+        dict:  For each function in self.featurizer_funcs with the key ``k``, store the computed feature under the key ``k``.
+    """
+    mol = dm.to_mol(mol)
+    num_atoms = mol.GetNumAtoms()
+    atom_features = defaultdict(list)
+
+    # Compute features for each atom
+    for i in range(num_atoms):
+        atom = mol.GetAtomWithIdx(i)
+        for feat_name, feat_func in self.featurizer_funcs.items():
+            atom_features[feat_name].append(feat_func(atom))
+
+    # Stack the features and convert them to float arrays
+    processed_features = dict()
+    for feat_name, feat_list in atom_features.items():
+        feat = np.stack(feat_list).astype(np.float32)
+        processed_features[feat_name] = feat
+
+    if self.concat:
+        processed_features = self._concat(processed_features)
+
+    if dtype is not None:
+        for feat_name, feat in processed_features.items():
+            feat = datatype.cast(feat, dtype=dtype)
+            processed_features[feat_name] = feat
+
+    return processed_features
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer_funcs=None, concat=True, name='hv') + +

+ + +
+ +

Init function of the atom property calculator

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer_funcs + +
+

Mapping of feature name to the featurization function. +For compatibility a list of callable/function is still accepted, and the corresponding +featurizer name will be automatically generated. Each function is of signature +func(dm.Atom) -> list or 1D numpy array.

+
+
+ None +
concat + bool + +
+

Whether to concat all the data into a single value in the output dict

+
+
+ True +
name + str + +
+

Name of the key name of the concatenated features

+
+
+ 'hv' +
+ +
+ Source code in molfeat/calc/atom.py +
65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
+97
def __init__(
+    self,
+    featurizer_funcs: Dict[str, Callable] = None,
+    concat: bool = True,
+    name: str = "hv",
+):
+    """
+    Init function of the atom property calculator
+
+    Args:
+        featurizer_funcs : Mapping of feature name to the featurization function.
+            For compatibility a list of callable/function is still accepted, and the corresponding
+            featurizer name will be automatically generated. Each function is of signature
+            ``func(dm.Atom) -> list or 1D numpy array``.
+        concat: Whether to concat all the data into a single value in the output dict
+        name: Name of the key name of the concatenated features
+    """
+    self._input_kwargs = locals().copy()
+    self._input_kwargs.pop("self")
+    # we also remove the featurizer funcs
+    self._input_kwargs.pop("featurizer_funcs", None)
+    self._toy_mol = dm.to_mol("CCO")
+    self._feat_sizes = dict()
+    if featurizer_funcs is None:
+        featurizer_funcs = self.DEFAULT_FEATURIZER
+    if not isinstance(featurizer_funcs, dict):
+        get_name = lambda x: getattr(x, "__name__", repr(x))
+        featurizer_funcs = dict((get_name(x), x) for x in featurizer_funcs)
+    self.featurizer_funcs = featurizer_funcs
+    for k in self.featurizer_funcs.keys():
+        self.feat_size(feat_name=k)
+    self.concat = concat
+    self.name = name
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Get length of the property estimator

+ +
+ Source code in molfeat/calc/atom.py +
191
+192
+193
def __len__(self):
+    """Get length of the property estimator"""
+    return sum(v for k, v in self._feat_sizes.items() if k != self.name)
+
+
+
+ +
+ + +
+ + + + +

+ feat_size(feat_name=None) + +

+ + +
+ +

Get the feature size for feat_name.

+

When there is only one feature, users do not need to provide feat_name.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
feat_name + +
+

Feature for query.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
int + +
+

Feature size for the feature with name feat_name. Default to None.

+
+
+ +
+ Source code in molfeat/calc/atom.py +
162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
def feat_size(self, feat_name=None):
+    """Get the feature size for ``feat_name``.
+
+    When there is only one feature, users do not need to provide ``feat_name``.
+
+    Args:
+        feat_name: Feature for query.
+
+    Returns:
+        int: Feature size for the feature with name ``feat_name``. Default to None.
+    """
+    if feat_name is None:
+        assert (
+            len(self.featurizer_funcs) == 1
+        ), "feat_name should be provided if there are more than one features"
+        feat_name = list(self.featurizer_funcs.keys())[0]
+
+    if feat_name not in self.featurizer_funcs:
+        raise ValueError(
+            "Expect feat_name to be in {}, got {}".format(
+                list(self.featurizer_funcs.keys()), feat_name
+            )
+        )
+
+    if feat_name not in self._feat_sizes:
+        atom = self._toy_mol.GetAtomWithIdx(0)
+        self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](atom))
+    return self._feat_sizes[feat_name]
+
+
+
+ +
+ + +
+ + + + +

+ from_state_dict(state_dict, override_args=None) + + + classmethod + + +

+ + +
+ +

Create an instance of an atom calculator from a state dict

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
state_dict + +
+

state dictionary to use to create the atom calculator

+
+
+ required +
override_args + Optional[dict] + +
+

optional dictionary of arguments to override the ones in the state dict +at construction of the new object

+
+
+ None +
+ +
+ Source code in molfeat/calc/atom.py +
127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
@classmethod
+def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+    """Create an instance of an atom calculator from a state dict
+
+    Args:
+        state_dict: state dictionary to use to create the atom calculator
+        override_args: optional dictionary of arguments to override the ones in the state dict
+            at construction of the new object
+    """
+    # EN: at this moment, version compatibility is not enforced
+    cls_name = state_dict.get("name", cls.__name__)
+    module_name = state_dict.get("module", cls.__module__)
+    module = importlib.import_module(module_name)
+    klass = getattr(module, cls_name)
+    kwargs = state_dict["args"].copy()
+    # now we need to unpickle the featurizer functions
+    featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+    if featurizer_fn_pickled is not None:
+        featurizer_fn_loaded = {}
+        for k, v in featurizer_fn_pickled.items():
+            featurizer_fn_loaded[k] = hex_to_fn(v)
+        kwargs["featurizer_funcs"] = featurizer_fn_loaded
+    kwargs.update(**(override_args or {}))
+    return klass(**kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Convert the Atom calculator to a state dict +Due to some constraints and cross-version compatibility, the featurizer functions +need to be pickled and not just return a list

+ +
+ Source code in molfeat/calc/atom.py +
 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
def to_state_dict(self):
+    """
+    Convert the Atom calculator to a state dict
+    Due to some constraints and cross-version compatibility,  the featurizer functions
+    need to be pickled and not just return a list
+    """
+    state_dict = {}
+    state_dict["name"] = self.__class__.__name__
+    state_dict["module"] = self.__class__.__module__
+    state_dict["args"] = self._input_kwargs
+    featurizer_fn_pickled = {}
+    for fname, ffunc in self.featurizer_funcs.items():
+        featurizer_fn_pickled[fname] = fn_to_hex(ffunc)
+    state_dict["args"]["featurizer_funcs"] = featurizer_fn_pickled
+    state_dict["_molfeat_version"] = MOLFEAT_VERSION
+
+    signature = inspect.signature(self.__init__)
+    val = {
+        k: v.default
+        for k, v in signature.parameters.items()
+        # if v.default is not inspect.Parameter.empty
+    }
+    to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+    for k in to_remove:
+        state_dict["args"].pop(k)
+
+    return state_dict
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ AtomMaterialCalculator + + +

+ + +
+

+ Bases: AtomCalculator

+ + +

Atom calculator with the extend atomic property list +which have been collected from various material science packages

+ +
+ Source code in molfeat/calc/atom.py +
233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
class AtomMaterialCalculator(AtomCalculator):
+    """Atom calculator with the extend atomic property list
+    which have been collected from various material science packages
+    """
+
+    DEFAULT_FEATURIZER = {
+        "atom_one_hot": atom_one_hot,
+        "atom_extended_properties": atom_extended_properties,
+        "atom_degree_one_hot": atom_degree_one_hot,
+        "atom_implicit_valence_one_hot": atom_implicit_valence_one_hot,
+        "atom_hybridization_one_hot": atom_hybridization_one_hot,
+        "atom_is_aromatic": atom_is_aromatic,
+        "atom_formal_charge": atom_formal_charge,
+        "atom_num_radical_electrons": atom_num_radical_electrons,
+        "atom_is_in_ring": atom_is_in_ring,
+        "atom_chiral_tag_one_hot": atom_chiral_tag_one_hot,
+        "atom_is_chiral_center": atom_is_chiral_center,
+    }
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ DGLCanonicalAtomCalculator + + +

+ + +
+

+ Bases: AtomCalculator

+ + +

Default canonical featurizer for atoms used by dgllife

+ +
+ Source code in molfeat/calc/atom.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
class DGLCanonicalAtomCalculator(AtomCalculator):
+    """Default canonical featurizer for atoms used by dgllife"""
+
+    DEFAULT_FEATURIZER = {
+        "atom_one_hot": atom_one_hot,
+        "atom_degree_one_hot": atom_degree_one_hot,
+        "atom_implicit_valence_one_hot": atom_implicit_valence_one_hot,
+        "atom_formal_charge": atom_formal_charge,
+        "atom_num_radical_electrons": atom_num_radical_electrons,
+        "atom_hybridization_one_hot": partial(
+            atom_hybridization_one_hot, allowable_set=DGLLIFE_HYBRIDIZATION_LIST
+        ),
+        "atom_is_aromatic": atom_is_aromatic,
+        "atom_total_num_H_one_hot": atom_total_num_H_one_hot,
+    }
+
+    def _concat(self, data_dict: Dict[str, Iterable]):
+        """Concatenate the data into a single value
+
+        Args:
+            data_dict: mapping of feature names to tensor/arrays
+        Returns:
+            concatenated_dict: a dict with a single key where all array have been concatenated
+        """
+        out = concat_dict(data_dict, new_name=self.name, order=list(self.featurizer_funcs.keys()))
+        return out
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ DGLWeaveAtomCalculator + + +

+ + +
+

+ Bases: DGLCanonicalAtomCalculator

+ + +

Default atom featurizer used by WeaveNet in DGLLife

+ +
+ Source code in molfeat/calc/atom.py +
281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
class DGLWeaveAtomCalculator(DGLCanonicalAtomCalculator):
+    """Default atom featurizer used by WeaveNet in DGLLife"""
+
+    DEFAULT_FEATURIZER = {
+        "atom_one_hot": partial(
+            atom_one_hot, allowable_set=DGLLIFE_WEAVE_ATOMS, encode_unknown=True
+        ),
+        "atom_chiral_tag_one_hot": partial(
+            atom_chiral_tag_one_hot, allowable_set=DGLLIFE_WEAVE_CHIRAL_TYPES
+        ),
+        "atom_formal_charge": atom_formal_charge,
+        "atom_partial_charge": atom_partial_charge,
+        "atom_is_aromatic": atom_is_aromatic,
+        "atom_hybridization_one_hot": partial(
+            atom_hybridization_one_hot, allowable_set=DGLLIFE_HYBRIDIZATION_LIST[:3]
+        ),
+    }
+
+    def __init__(self, concat: bool = True, name: str = "hv"):
+        featurizer_funcs = self.DEFAULT_FEATURIZER
+        featurizer_funcs["atom_weavenet_props"] = self.atom_weave_props
+        super().__init__(concat=concat, name=name, featurizer_funcs=featurizer_funcs)
+
+    def _get_atom_state_info(self, feats):
+        """Get atom Donor/Acceptor state information from chemical pharmacophore features
+
+        Args:
+            feats: computed chemical features
+        """
+        is_donor = defaultdict(bool)
+        is_acceptor = defaultdict(bool)
+        # Get hydrogen bond donor/acceptor information
+        for feats in feats:
+            if feats.GetFamily() == "Donor":
+                nodes = feats.GetAtomIds()
+                for u in nodes:
+                    is_donor[u] = True
+            elif feats.GetFamily() == "Acceptor":
+                nodes = feats.GetAtomIds()
+                for u in nodes:
+                    is_acceptor[u] = True
+        return is_donor, is_acceptor
+
+    @staticmethod
+    @lru_cache(maxsize=None)
+    def _feat_factory_cache():
+        """Build and cache chemical features caching for speed"""
+        fdef_name = os.path.join(RDConfig.RDDataDir, "BaseFeatures.fdef")
+        chem_feats = ChemicalFeatures.BuildFeatureFactory(fdef_name)
+        return chem_feats
+
+    @lru_cache
+    def _compute_weave_net_properties(self, mol: dm.Mol):
+        # Get information for donor and acceptor
+        chem_feats = self._feat_factory_cache()
+        mol_feats = chem_feats.GetFeaturesForMol(mol)
+        is_donor, is_acceptor = self._get_atom_state_info(mol_feats)
+        sssr = GetSymmSSSR(mol)
+        num_atoms = mol.GetNumAtoms()
+        atom_features = []
+        for i in range(num_atoms):
+            cur_atom_props = [float(is_donor[i]), float(is_acceptor[i])]
+            # Count the number of rings the atom belongs to for ring size between 3 and 8
+            count = [0 for _ in range(3, 9)]
+            for ring in sssr:
+                ring_size = len(ring)
+                if i in ring and 3 <= ring_size <= 8:
+                    count[ring_size - 3] += 1
+            cur_atom_props.extend(count)
+            atom_features.append(cur_atom_props)
+        return atom_features
+
+    def atom_weave_props(self, atom: dm.Atom):
+        """Get the WeaveNet properties for an atom"""
+        mol = atom.GetOwningMol()
+        feats = self._compute_weave_net_properties(mol)
+        return feats[atom.GetIdx()]
+
+    def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None):
+        """
+        Get rdkit basic descriptors for a molecule
+
+        Args:
+            mol: the molecule of interest
+            dtype: requested data type
+
+        Returns:
+            dict:  For each function in self.featurizer_funcs with the key ``k``, store the computed feature under the key ``k``.
+        """
+        AllChem.ComputeGasteigerCharges(mol)
+        return super().__call__(
+            mol,
+            dtype,
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol, dtype=None) + +

+ + +
+ +

Get rdkit basic descriptors for a molecule

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
dtype + Callable + +
+

requested data type

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dict + +
+

For each function in self.featurizer_funcs with the key k, store the computed feature under the key k.

+
+
+ +
+ Source code in molfeat/calc/atom.py +
359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None):
+    """
+    Get rdkit basic descriptors for a molecule
+
+    Args:
+        mol: the molecule of interest
+        dtype: requested data type
+
+    Returns:
+        dict:  For each function in self.featurizer_funcs with the key ``k``, store the computed feature under the key ``k``.
+    """
+    AllChem.ComputeGasteigerCharges(mol)
+    return super().__call__(
+        mol,
+        dtype,
+    )
+
+
+
+ +
+ + +
+ + + + +

+ atom_weave_props(atom) + +

+ + +
+ +

Get the WeaveNet properties for an atom

+ +
+ Source code in molfeat/calc/atom.py +
353
+354
+355
+356
+357
def atom_weave_props(self, atom: dm.Atom):
+    """Get the WeaveNet properties for an atom"""
+    mol = atom.GetOwningMol()
+    feats = self._compute_weave_net_properties(mol)
+    return feats[atom.GetIdx()]
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Bonds Featurizer

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ BondCalculator + + +

+ + +
+

+ Bases: SerializableCalculator

+ + +

A class for bond featurizer which loops over all bonds in a molecule and +featurizes them with the featurizer_funcs. The constructed graph is assumed to be +a bi-directed graph by default.

+ +
+ Source code in molfeat/calc/bond.py +
 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
class BondCalculator(SerializableCalculator):
+    """
+    A class for bond featurizer which loops over all bonds in a molecule and
+    featurizes them with the ``featurizer_funcs``. The constructed graph is assumed to be
+    a bi-directed graph by default.
+    """
+
+    DEFAULT_FEATURIZER = {
+        "bond_type_one_hot": bond_type_one_hot,
+        "bond_stereo_one_hot": bond_stereo_one_hot,
+        "bond_is_in_ring": bond_is_in_ring,
+        "bond_is_conjugated": bond_is_conjugated,
+        "bond_direction_one_hot": bond_direction_one_hot,
+    }
+
+    def __init__(
+        self,
+        featurizer_funcs: Union[list, dict] = None,
+        self_loop: bool = False,
+        concat: bool = True,
+        name: str = "he",
+    ):
+        """
+        Init function of the bond property calculator
+
+        Args:
+            featurizer_funcs: Mapping feature name to the featurization function.
+            self_loop: Whether self loops will be added. Default to False. If True, an additional
+                column of binary values to indicate the identity of self loops will be added.
+                The other features of the self loops will be zero.
+            concat: Whether to concat all the data into a single value in the output dict
+            name: Name of the key name of the concatenated features
+        """
+        self._input_kwargs = locals().copy()
+        self._input_kwargs.pop("self")
+        # remove featurizer_funcs too
+        self._input_kwargs.pop("featurizer_funcs", None)
+        self._toy_mol = dm.to_mol("CO")
+        self._feat_sizes = dict()
+        if featurizer_funcs is None:
+            featurizer_funcs = self.DEFAULT_FEATURIZER
+        if not isinstance(featurizer_funcs, dict):
+            get_name = lambda x: getattr(x, "__name__", repr(x))
+            featurizer_funcs = dict((get_name(x), x) for x in featurizer_funcs)
+        self.featurizer_funcs = featurizer_funcs
+        self._self_loop = self_loop
+        self.concat = concat
+        self.name = name
+        for k in self.featurizer_funcs.keys():
+            self.feat_size(feat_name=k)
+        if self._self_loop:
+            self._feat_sizes["self_loop"] = 1
+
+    def to_state_dict(self):
+        """Convert the Atom calculator to a state dict
+        Due to some constraints and cross-version compatibility,  the featurizer functions
+        need to be pickled and not just list
+        """
+        state_dict = {}
+        state_dict["name"] = self.__class__.__name__
+        state_dict["module"] = self.__class__.__module__
+        state_dict["args"] = self._input_kwargs
+
+        featurizer_fn_pickled = {}
+        for fname, ffunc in self.featurizer_funcs.items():
+            featurizer_fn_pickled[fname] = fn_to_hex(ffunc)
+        state_dict["args"]["featurizer_funcs"] = featurizer_fn_pickled
+        state_dict["_molfeat_version"] = MOLFEAT_VERSION
+        signature = inspect.signature(self.__init__)
+        val = {
+            k: v.default
+            for k, v in signature.parameters.items()
+            #    if v.default is not inspect.Parameter.empty
+        }
+        to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+        for k in to_remove:
+            state_dict["args"].pop(k)
+        return state_dict
+
+    @classmethod
+    def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+        """Create an instance of an atom calculator from a state dict
+
+        Args:
+            state_dict: state dictionary to use to create the atom calculator
+            override_args: optional dictionary of arguments to override the ones in the state dict
+                at construction of the new object
+        """
+        # EN: at this moment, version compatibility is not enforced
+        cls_name = state_dict.get("name", cls.__name__)
+        module_name = state_dict.get("module", cls.__module__)
+        module = importlib.import_module(module_name)
+        klass = getattr(module, cls_name)
+
+        kwargs = state_dict["args"].copy()
+        # now we need to unpickle the featurizer functions
+        featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+        if featurizer_fn_pickled is not None:
+            featurizer_fn_loaded = {}
+            for k, v in featurizer_fn_pickled.items():
+                featurizer_fn_loaded[k] = hex_to_fn(v)
+            kwargs["featurizer_funcs"] = featurizer_fn_loaded
+        kwargs.update(**(override_args or {}))
+        return klass(**kwargs)
+
+    def _concat(self, data_dict: Dict[str, Iterable]):
+        """Concatenate the data into a single value
+
+        Args:
+            data_dict: mapping of feature names to tensor/arrays
+        Returns:
+            concatenated_dict: a dict with a single key where all array have been concatenated
+        """
+        return concat_dict(data_dict, new_name=self.name)
+
+    def feat_size(self, feat_name: Optional[str] = None):
+        """Get the feature size for ``feat_name``.
+
+        When there is only one feature, ``feat_name`` can be None.
+
+        Args:
+            feat_name: Feature for query.
+
+        Returns:
+            int: Feature size for the feature with name ``feat_name``. Default to None.
+        """
+        if feat_name is None:
+            assert (
+                len(self.featurizer_funcs) == 1
+            ), "feat_name should be provided if there are more than one features"
+            feat_name = list(self.featurizer_funcs.keys())[0]
+
+        if feat_name not in self.featurizer_funcs:
+            raise ValueError(
+                "Expect feat_name to be in {}, got {}".format(
+                    list(self.featurizer_funcs.keys()), feat_name
+                )
+            )
+        if feat_name not in self._feat_sizes:
+            bond = self._toy_mol.GetBondWithIdx(0)
+            self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](bond))
+        return self._feat_sizes[feat_name]
+
+    def __len__(self):
+        """Get length of the property estimator"""
+        return sum(v for k, v in self._feat_sizes.items() if k != self.name)
+
+    def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None, **kwargs):
+        """Featurize all bonds in a molecule.
+
+        Args:
+            mol: the molecule of interest
+            dtype: requested data type
+
+        Returns:
+            dict: For each function in self.featurizer_funcs with the key ``k``,
+                store the computed feature under the key ``k``.
+        """
+        mol = dm.to_mol(mol)
+        num_bonds = mol.GetNumBonds()
+        bond_features = defaultdict(list)
+
+        # Compute features for each bond
+        for i in range(num_bonds):
+            bond = mol.GetBondWithIdx(i)
+            for feat_name, feat_func in self.featurizer_funcs.items():
+                feat = feat_func(bond)
+                bond_features[feat_name].extend([feat, feat.copy()])
+
+        # Stack the features and convert them to float arrays
+        processed_features = dict()
+        for feat_name, feat_list in bond_features.items():
+            feat = np.stack(feat_list)
+            processed_features[feat_name] = feat
+
+        if self._self_loop and num_bonds > 0:
+            num_atoms = mol.GetNumAtoms()
+            for feat_name in processed_features:
+                feats = processed_features[feat_name]
+                # add a new label that says the feat are not self loop
+                # feats = np.concatenate([feats, np.zeros((feats.shape[0], 1))], axis=1)
+                # add a label at the last position that says it's a selfloop
+                add_edges = np.zeros((num_atoms, feats.shape[1]))
+                # self_loop_feats[:, -1] = 1
+                feats = np.concatenate([feats, add_edges], axis=0)
+                processed_features[feat_name] = feats
+            self_loop_feats = np.concatenate(
+                [np.zeros((num_bonds * 2, 1)), np.ones((num_atoms, 1))]
+            )
+
+            processed_features["self_loop"] = self_loop_feats
+
+        if self._self_loop and num_bonds == 0:
+            num_atoms = mol.GetNumAtoms()
+            old_concat = self.concat
+            self.concat = False
+            processed_features = self(self._toy_mol)
+            self.concat = old_concat
+            for feat_name in processed_features:
+                feats = processed_features[feat_name]
+                feats = np.zeros((num_atoms, feats.shape[1]))
+                processed_features[feat_name] = feats
+        if self.concat and (num_bonds > 0 or self._self_loop):
+            processed_features = self._concat(processed_features)
+        if dtype is not None:
+            for feat_name, feat in processed_features.items():
+                feat = datatype.cast(feat, dtype=dtype)
+                processed_features[feat_name] = feat
+
+        return processed_features
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol, dtype=None, **kwargs) + +

+ + +
+ +

Featurize all bonds in a molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
dtype + Callable + +
+

requested data type

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dict + +
+

For each function in self.featurizer_funcs with the key k, +store the computed feature under the key k.

+
+
+ +
+ Source code in molfeat/calc/bond.py +
178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None, **kwargs):
+    """Featurize all bonds in a molecule.
+
+    Args:
+        mol: the molecule of interest
+        dtype: requested data type
+
+    Returns:
+        dict: For each function in self.featurizer_funcs with the key ``k``,
+            store the computed feature under the key ``k``.
+    """
+    mol = dm.to_mol(mol)
+    num_bonds = mol.GetNumBonds()
+    bond_features = defaultdict(list)
+
+    # Compute features for each bond
+    for i in range(num_bonds):
+        bond = mol.GetBondWithIdx(i)
+        for feat_name, feat_func in self.featurizer_funcs.items():
+            feat = feat_func(bond)
+            bond_features[feat_name].extend([feat, feat.copy()])
+
+    # Stack the features and convert them to float arrays
+    processed_features = dict()
+    for feat_name, feat_list in bond_features.items():
+        feat = np.stack(feat_list)
+        processed_features[feat_name] = feat
+
+    if self._self_loop and num_bonds > 0:
+        num_atoms = mol.GetNumAtoms()
+        for feat_name in processed_features:
+            feats = processed_features[feat_name]
+            # add a new label that says the feat are not self loop
+            # feats = np.concatenate([feats, np.zeros((feats.shape[0], 1))], axis=1)
+            # add a label at the last position that says it's a selfloop
+            add_edges = np.zeros((num_atoms, feats.shape[1]))
+            # self_loop_feats[:, -1] = 1
+            feats = np.concatenate([feats, add_edges], axis=0)
+            processed_features[feat_name] = feats
+        self_loop_feats = np.concatenate(
+            [np.zeros((num_bonds * 2, 1)), np.ones((num_atoms, 1))]
+        )
+
+        processed_features["self_loop"] = self_loop_feats
+
+    if self._self_loop and num_bonds == 0:
+        num_atoms = mol.GetNumAtoms()
+        old_concat = self.concat
+        self.concat = False
+        processed_features = self(self._toy_mol)
+        self.concat = old_concat
+        for feat_name in processed_features:
+            feats = processed_features[feat_name]
+            feats = np.zeros((num_atoms, feats.shape[1]))
+            processed_features[feat_name] = feats
+    if self.concat and (num_bonds > 0 or self._self_loop):
+        processed_features = self._concat(processed_features)
+    if dtype is not None:
+        for feat_name, feat in processed_features.items():
+            feat = datatype.cast(feat, dtype=dtype)
+            processed_features[feat_name] = feat
+
+    return processed_features
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer_funcs=None, self_loop=False, concat=True, name='he') + +

+ + +
+ +

Init function of the bond property calculator

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer_funcs + Union[list, dict] + +
+

Mapping feature name to the featurization function.

+
+
+ None +
self_loop + bool + +
+

Whether self loops will be added. Default to False. If True, an additional +column of binary values to indicate the identity of self loops will be added. +The other features of the self loops will be zero.

+
+
+ False +
concat + bool + +
+

Whether to concat all the data into a single value in the output dict

+
+
+ True +
name + str + +
+

Name of the key name of the concatenated features

+
+
+ 'he' +
+ +
+ Source code in molfeat/calc/bond.py +
46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
def __init__(
+    self,
+    featurizer_funcs: Union[list, dict] = None,
+    self_loop: bool = False,
+    concat: bool = True,
+    name: str = "he",
+):
+    """
+    Init function of the bond property calculator
+
+    Args:
+        featurizer_funcs: Mapping feature name to the featurization function.
+        self_loop: Whether self loops will be added. Default to False. If True, an additional
+            column of binary values to indicate the identity of self loops will be added.
+            The other features of the self loops will be zero.
+        concat: Whether to concat all the data into a single value in the output dict
+        name: Name of the key name of the concatenated features
+    """
+    self._input_kwargs = locals().copy()
+    self._input_kwargs.pop("self")
+    # remove featurizer_funcs too
+    self._input_kwargs.pop("featurizer_funcs", None)
+    self._toy_mol = dm.to_mol("CO")
+    self._feat_sizes = dict()
+    if featurizer_funcs is None:
+        featurizer_funcs = self.DEFAULT_FEATURIZER
+    if not isinstance(featurizer_funcs, dict):
+        get_name = lambda x: getattr(x, "__name__", repr(x))
+        featurizer_funcs = dict((get_name(x), x) for x in featurizer_funcs)
+    self.featurizer_funcs = featurizer_funcs
+    self._self_loop = self_loop
+    self.concat = concat
+    self.name = name
+    for k in self.featurizer_funcs.keys():
+        self.feat_size(feat_name=k)
+    if self._self_loop:
+        self._feat_sizes["self_loop"] = 1
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Get length of the property estimator

+ +
+ Source code in molfeat/calc/bond.py +
174
+175
+176
def __len__(self):
+    """Get length of the property estimator"""
+    return sum(v for k, v in self._feat_sizes.items() if k != self.name)
+
+
+
+ +
+ + +
+ + + + +

+ feat_size(feat_name=None) + +

+ + +
+ +

Get the feature size for feat_name.

+

When there is only one feature, feat_name can be None.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
feat_name + Optional[str] + +
+

Feature for query.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
int + +
+

Feature size for the feature with name feat_name. Default to None.

+
+
+ +
+ Source code in molfeat/calc/bond.py +
146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
def feat_size(self, feat_name: Optional[str] = None):
+    """Get the feature size for ``feat_name``.
+
+    When there is only one feature, ``feat_name`` can be None.
+
+    Args:
+        feat_name: Feature for query.
+
+    Returns:
+        int: Feature size for the feature with name ``feat_name``. Default to None.
+    """
+    if feat_name is None:
+        assert (
+            len(self.featurizer_funcs) == 1
+        ), "feat_name should be provided if there are more than one features"
+        feat_name = list(self.featurizer_funcs.keys())[0]
+
+    if feat_name not in self.featurizer_funcs:
+        raise ValueError(
+            "Expect feat_name to be in {}, got {}".format(
+                list(self.featurizer_funcs.keys()), feat_name
+            )
+        )
+    if feat_name not in self._feat_sizes:
+        bond = self._toy_mol.GetBondWithIdx(0)
+        self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](bond))
+    return self._feat_sizes[feat_name]
+
+
+
+ +
+ + +
+ + + + +

+ from_state_dict(state_dict, override_args=None) + + + classmethod + + +

+ + +
+ +

Create an instance of an atom calculator from a state dict

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
state_dict + +
+

state dictionary to use to create the atom calculator

+
+
+ required +
override_args + Optional[dict] + +
+

optional dictionary of arguments to override the ones in the state dict +at construction of the new object

+
+
+ None +
+ +
+ Source code in molfeat/calc/bond.py +
110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
@classmethod
+def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+    """Create an instance of an atom calculator from a state dict
+
+    Args:
+        state_dict: state dictionary to use to create the atom calculator
+        override_args: optional dictionary of arguments to override the ones in the state dict
+            at construction of the new object
+    """
+    # EN: at this moment, version compatibility is not enforced
+    cls_name = state_dict.get("name", cls.__name__)
+    module_name = state_dict.get("module", cls.__module__)
+    module = importlib.import_module(module_name)
+    klass = getattr(module, cls_name)
+
+    kwargs = state_dict["args"].copy()
+    # now we need to unpickle the featurizer functions
+    featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+    if featurizer_fn_pickled is not None:
+        featurizer_fn_loaded = {}
+        for k, v in featurizer_fn_pickled.items():
+            featurizer_fn_loaded[k] = hex_to_fn(v)
+        kwargs["featurizer_funcs"] = featurizer_fn_loaded
+    kwargs.update(**(override_args or {}))
+    return klass(**kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Convert the Atom calculator to a state dict +Due to some constraints and cross-version compatibility, the featurizer functions +need to be pickled and not just list

+ +
+ Source code in molfeat/calc/bond.py +
 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
def to_state_dict(self):
+    """Convert the Atom calculator to a state dict
+    Due to some constraints and cross-version compatibility,  the featurizer functions
+    need to be pickled and not just list
+    """
+    state_dict = {}
+    state_dict["name"] = self.__class__.__name__
+    state_dict["module"] = self.__class__.__module__
+    state_dict["args"] = self._input_kwargs
+
+    featurizer_fn_pickled = {}
+    for fname, ffunc in self.featurizer_funcs.items():
+        featurizer_fn_pickled[fname] = fn_to_hex(ffunc)
+    state_dict["args"]["featurizer_funcs"] = featurizer_fn_pickled
+    state_dict["_molfeat_version"] = MOLFEAT_VERSION
+    signature = inspect.signature(self.__init__)
+    val = {
+        k: v.default
+        for k, v in signature.parameters.items()
+        #    if v.default is not inspect.Parameter.empty
+    }
+    to_remove = [k for k in state_dict["args"] if k not in val.keys()]
+    for k in to_remove:
+        state_dict["args"].pop(k)
+    return state_dict
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ DGLCanonicalBondCalculator + + +

+ + +
+

+ Bases: BondCalculator

+ + +
+ Source code in molfeat/calc/bond.py +
404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
class DGLCanonicalBondCalculator(BondCalculator):
+    DEFAULT_FEATURIZER = {
+        "bond_type_one_hot": bond_type_one_hot,
+        "bond_is_conjugated": bond_is_conjugated,
+        "bond_is_in_ring": bond_is_in_ring,
+        "bond_stereo_one_hot": bond_stereo_one_hot,
+    }
+
+    def _concat(self, data_dict: Dict[str, Iterable]):
+        """Concatenate the data into a single value
+
+        Args:
+            data_dict: mapping of feature names to tensor/arrays
+        Returns:
+            concatenated_dict: a dict with a single key where all array have been concatenated
+        """
+        return concat_dict(data_dict, new_name=self.name, order=list(self.featurizer_funcs.keys()))
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ DGLWeaveEdgeCalculator + + +

+ + +
+

+ Bases: EdgeMatCalculator

+ + +

Edge featurizer used by WeaveNets

+

The edge featurization is introduced in Molecular Graph Convolutions: +Moving Beyond Fingerprints <https://arxiv.org/abs/1603.00856>__.

+

This featurization is performed for a complete graph of atoms with self loops added, +which considers the following default:

+
    +
  • Number of bonds between each pairs of atoms
  • +
  • One-hot encoding of bond type if a bond exists between a pair of atoms
  • +
  • Whether a pair of atoms belongs to a same ring
  • +
+ +
+ Source code in molfeat/calc/bond.py +
423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
class DGLWeaveEdgeCalculator(EdgeMatCalculator):
+    """Edge featurizer used by WeaveNets
+
+    The edge featurization is introduced in `Molecular Graph Convolutions:
+    Moving Beyond Fingerprints <https://arxiv.org/abs/1603.00856>`__.
+
+    This featurization is performed for a complete graph of atoms with self loops added,
+    which considers the following default:
+
+    * Number of bonds between each pairs of atoms
+    * One-hot encoding of bond type if a bond exists between a pair of atoms
+    * Whether a pair of atoms belongs to a same ring
+
+    """
+
+    DEFAULT_FEATURIZER = {}
+    DEFAULT_PAIRWISE_FEATURIZER = {
+        "pairwise_dist_indicator": pairwise_dist_indicator,
+        "pairwise_bond_indicator": pairwise_bond_indicator,
+        "pairwise_ring_membership": pairwise_ring_membership,
+    }
+
+    def _concat(self, data_dict: Dict[str, Iterable]):
+        """Concatenate the data into a single value
+
+        Args:
+            data_dict: mapping of feature names to tensor/arrays
+        Returns:
+            concatenated_dict: a dict with a single key where all array have been concatenated
+        """
+
+        # To reproduce DGLDefault, we need to keep the order of dict insertion
+        return concat_dict(
+            data_dict, new_name=self.name, order=list(self.pairwise_atom_funcs.keys())
+        )
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ EdgeMatCalculator + + +

+ + +
+

+ Bases: BondCalculator

+ + +

Generate edge featurizer matrix

+ +
+ Source code in molfeat/calc/bond.py +
243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
class EdgeMatCalculator(BondCalculator):
+    """Generate edge featurizer matrix"""
+
+    DEFAULT_PAIRWISE_FEATURIZER = {
+        "pairwise_2D_dist": pairwise_2D_dist,
+        # "pairwise_3D_dist": pairwise_3D_dist,
+        "pairwise_ring_membership": pairwise_ring_membership,
+    }
+
+    def __init__(
+        self,
+        featurizer_funcs: Union[list, dict] = None,
+        pairwise_atom_funcs: Union[list, dict, str] = "default",
+        name: str = "he",
+    ):
+        """
+        Init function of the edge matrix property calculator
+
+        Args:
+            featurizer_funcs: Mapping feature name to the featurization function.
+            pairwise_atom_funcs: Mapping feature name to pairwise featurization function.
+                Use the keywords "default" for the default values
+        """
+        if pairwise_atom_funcs == "default":
+            pairwise_atom_funcs = self.DEFAULT_PAIRWISE_FEATURIZER
+        if not isinstance(pairwise_atom_funcs, dict):
+            get_name = lambda x: getattr(x, "__name__", repr(x))
+            pairwise_atom_funcs = dict((get_name(x), x) for x in pairwise_atom_funcs)
+        self.pairwise_atom_funcs = pairwise_atom_funcs
+        super().__init__(featurizer_funcs=featurizer_funcs, concat=True, name=name)
+        # add conf data to toy mol
+        self._toy_mol = dm.conformers.generate(self._toy_mol, n_confs=1, minimize_energy=False)
+        for k in self.pairwise_atom_funcs.keys():
+            self.feat_size(feat_name=k)
+
+    def to_state_dict(self):
+        """Convert the Atom calculator to a state dict
+        Due to some constraints and cross-version compatibility,  the featurizer functions
+        need to be pickled and not just list
+        """
+        state_dict = super().to_state_dict()
+        # repeat for the pairwise one
+        pairwise_atom_fn_pickled = {}
+        for fname, ffunc in self.pairwise_atom_funcs.items():
+            pairwise_atom_fn_pickled[fname] = fn_to_hex(ffunc)
+        state_dict["args"]["pairwise_atom_funcs"] = pairwise_atom_fn_pickled
+        return state_dict
+
+    @classmethod
+    def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+        """Create an instance of an atom calculator from a state dict
+
+        Args:
+            state_dict: state dictionary to use to create the atom calculator
+            override_args: optional dictionary of arguments to override the ones in the state dict
+                at construction of the new object
+        """
+        # EN: at this moment, version compatibility is not enforced
+        cls_name = state_dict.get("name", cls.__name__)
+        module_name = state_dict.get("module", cls.__module__)
+        module = importlib.import_module(module_name)
+        klass = getattr(module, cls_name)
+
+        kwargs = state_dict["args"].copy()
+        # now we need to unpickle the featurizer functions
+        featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+        if featurizer_fn_pickled is not None:
+            featurizer_fn_loaded = {}
+            for k, v in featurizer_fn_pickled.items():
+                featurizer_fn_loaded[k] = hex_to_fn(v)
+            kwargs["featurizer_funcs"] = featurizer_fn_loaded
+
+        pairwise_atom_fn_pickled = kwargs.pop("pairwise_atom_funcs", None)
+        if pairwise_atom_fn_pickled is not None:
+            pairwise_atom_fn_loaded = {}
+            for k, v in pairwise_atom_fn_pickled.items():
+                pairwise_atom_fn_loaded[k] = hex_to_fn(v)
+            kwargs["pairwise_atom_funcs"] = pairwise_atom_fn_loaded
+        kwargs.update(**(override_args or {}))
+        return klass(**kwargs)
+
+    def feat_size(self, feat_name: Optional[str] = None):
+        """Get the feature size for ``feat_name``.
+
+        Args:
+            feat_name: Feature for query.
+
+        Returns:
+            int: Feature size for the feature with name ``feat_name``. Default to None.
+        """
+        if feat_name not in self.featurizer_funcs and feat_name not in self.pairwise_atom_funcs:
+            raise ValueError(
+                "Expect feat_name to be in {}, got {}".format(
+                    list(self.featurizer_funcs.keys()), feat_name
+                )
+            )
+        if feat_name not in self._feat_sizes:
+            if feat_name in self.featurizer_funcs:
+                bond = self._toy_mol.GetBondWithIdx(0)
+                self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](bond))
+            elif feat_name in self.pairwise_atom_funcs:
+                self._feat_sizes[feat_name] = self.pairwise_atom_funcs[feat_name](
+                    self._toy_mol
+                ).shape[-1]
+            else:
+                raise ValueError(f"Feature name {feat_name} is not defined !")
+        return self._feat_sizes[feat_name]
+
+    def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None, flat: bool = True):
+        """Featurize all bonds in a molecule.
+
+        Args:
+            mol: the molecule of interest
+            dtype: requested data type
+            flat: whether to return a collapsed N^2, M or a N, N, M matrix
+
+        Returns:
+            dict: For each function in self.featurizer_funcs with the key ``k``,
+                store the computed feature under the key ``k``.
+        """
+
+        mol = dm.to_mol(mol)
+        num_bonds = mol.GetNumBonds()
+        num_atoms = mol.GetNumAtoms()
+        feat_size = len(self)
+        edge_matrix = None
+
+        if self.pairwise_atom_funcs is not None:
+            feat_size -= sum(self._feat_sizes[x] for x in self.pairwise_atom_funcs.keys())
+        if self.featurizer_funcs is not None and len(self.featurizer_funcs) > 0:
+            edge_matrix = np.zeros((num_atoms, num_atoms, feat_size))
+            # Compute features for each bond
+            for i in range(num_bonds):
+                bond = mol.GetBondWithIdx(i)
+                a_idx_1 = bond.GetBeginAtomIdx()
+                a_idx_2 = bond.GetEndAtomIdx()
+                bond_features = defaultdict(list)
+                for feat_name, feat_func in self.featurizer_funcs.items():
+                    feat = feat_func(bond)
+                    bond_features[feat_name].extend([feat])
+                bond_features = self._concat(bond_features)[self.name]
+                edge_matrix[a_idx_1, a_idx_2] = bond_features
+                edge_matrix[a_idx_2, a_idx_1] = bond_features
+
+            edge_matrix = edge_matrix.reshape(-1, feat_size)
+        if self.pairwise_atom_funcs is not None:
+            pwise_features = dict()
+            for pname, pfunc in self.pairwise_atom_funcs.items():
+                pwise_features[pname] = pfunc(mol)
+            pwise_features = self._concat(pwise_features)[self.name]
+            if edge_matrix is not None:
+                edge_matrix = np.concatenate([edge_matrix, pwise_features], axis=-1)
+            else:
+                edge_matrix = pwise_features
+        if not flat:
+            edge_matrix = edge_matrix.reshape(num_atoms, num_atoms, -1)
+        if dtype is not None:
+            edge_matrix = datatype.cast(edge_matrix, dtype=dtype)
+        return {self.name: edge_matrix}
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol, dtype=None, flat=True) + +

+ + +
+ +

Featurize all bonds in a molecule.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Union[Mol, str] + +
+

the molecule of interest

+
+
+ required +
dtype + Callable + +
+

requested data type

+
+
+ None +
flat + bool + +
+

whether to return a collapsed N^2, M or a N, N, M matrix

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dict + +
+

For each function in self.featurizer_funcs with the key k, +store the computed feature under the key k.

+
+
+ +
+ Source code in molfeat/calc/bond.py +
351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
def __call__(self, mol: Union[dm.Mol, str], dtype: Callable = None, flat: bool = True):
+    """Featurize all bonds in a molecule.
+
+    Args:
+        mol: the molecule of interest
+        dtype: requested data type
+        flat: whether to return a collapsed N^2, M or a N, N, M matrix
+
+    Returns:
+        dict: For each function in self.featurizer_funcs with the key ``k``,
+            store the computed feature under the key ``k``.
+    """
+
+    mol = dm.to_mol(mol)
+    num_bonds = mol.GetNumBonds()
+    num_atoms = mol.GetNumAtoms()
+    feat_size = len(self)
+    edge_matrix = None
+
+    if self.pairwise_atom_funcs is not None:
+        feat_size -= sum(self._feat_sizes[x] for x in self.pairwise_atom_funcs.keys())
+    if self.featurizer_funcs is not None and len(self.featurizer_funcs) > 0:
+        edge_matrix = np.zeros((num_atoms, num_atoms, feat_size))
+        # Compute features for each bond
+        for i in range(num_bonds):
+            bond = mol.GetBondWithIdx(i)
+            a_idx_1 = bond.GetBeginAtomIdx()
+            a_idx_2 = bond.GetEndAtomIdx()
+            bond_features = defaultdict(list)
+            for feat_name, feat_func in self.featurizer_funcs.items():
+                feat = feat_func(bond)
+                bond_features[feat_name].extend([feat])
+            bond_features = self._concat(bond_features)[self.name]
+            edge_matrix[a_idx_1, a_idx_2] = bond_features
+            edge_matrix[a_idx_2, a_idx_1] = bond_features
+
+        edge_matrix = edge_matrix.reshape(-1, feat_size)
+    if self.pairwise_atom_funcs is not None:
+        pwise_features = dict()
+        for pname, pfunc in self.pairwise_atom_funcs.items():
+            pwise_features[pname] = pfunc(mol)
+        pwise_features = self._concat(pwise_features)[self.name]
+        if edge_matrix is not None:
+            edge_matrix = np.concatenate([edge_matrix, pwise_features], axis=-1)
+        else:
+            edge_matrix = pwise_features
+    if not flat:
+        edge_matrix = edge_matrix.reshape(num_atoms, num_atoms, -1)
+    if dtype is not None:
+        edge_matrix = datatype.cast(edge_matrix, dtype=dtype)
+    return {self.name: edge_matrix}
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer_funcs=None, pairwise_atom_funcs='default', name='he') + +

+ + +
+ +

Init function of the edge matrix property calculator

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer_funcs + Union[list, dict] + +
+

Mapping feature name to the featurization function.

+
+
+ None +
pairwise_atom_funcs + Union[list, dict, str] + +
+

Mapping feature name to pairwise featurization function. +Use the keywords "default" for the default values

+
+
+ 'default' +
+ +
+ Source code in molfeat/calc/bond.py +
252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
def __init__(
+    self,
+    featurizer_funcs: Union[list, dict] = None,
+    pairwise_atom_funcs: Union[list, dict, str] = "default",
+    name: str = "he",
+):
+    """
+    Init function of the edge matrix property calculator
+
+    Args:
+        featurizer_funcs: Mapping feature name to the featurization function.
+        pairwise_atom_funcs: Mapping feature name to pairwise featurization function.
+            Use the keywords "default" for the default values
+    """
+    if pairwise_atom_funcs == "default":
+        pairwise_atom_funcs = self.DEFAULT_PAIRWISE_FEATURIZER
+    if not isinstance(pairwise_atom_funcs, dict):
+        get_name = lambda x: getattr(x, "__name__", repr(x))
+        pairwise_atom_funcs = dict((get_name(x), x) for x in pairwise_atom_funcs)
+    self.pairwise_atom_funcs = pairwise_atom_funcs
+    super().__init__(featurizer_funcs=featurizer_funcs, concat=True, name=name)
+    # add conf data to toy mol
+    self._toy_mol = dm.conformers.generate(self._toy_mol, n_confs=1, minimize_energy=False)
+    for k in self.pairwise_atom_funcs.keys():
+        self.feat_size(feat_name=k)
+
+
+
+ +
+ + +
+ + + + +

+ feat_size(feat_name=None) + +

+ + +
+ +

Get the feature size for feat_name.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
feat_name + Optional[str] + +
+

Feature for query.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
int + +
+

Feature size for the feature with name feat_name. Default to None.

+
+
+ +
+ Source code in molfeat/calc/bond.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
def feat_size(self, feat_name: Optional[str] = None):
+    """Get the feature size for ``feat_name``.
+
+    Args:
+        feat_name: Feature for query.
+
+    Returns:
+        int: Feature size for the feature with name ``feat_name``. Default to None.
+    """
+    if feat_name not in self.featurizer_funcs and feat_name not in self.pairwise_atom_funcs:
+        raise ValueError(
+            "Expect feat_name to be in {}, got {}".format(
+                list(self.featurizer_funcs.keys()), feat_name
+            )
+        )
+    if feat_name not in self._feat_sizes:
+        if feat_name in self.featurizer_funcs:
+            bond = self._toy_mol.GetBondWithIdx(0)
+            self._feat_sizes[feat_name] = len(self.featurizer_funcs[feat_name](bond))
+        elif feat_name in self.pairwise_atom_funcs:
+            self._feat_sizes[feat_name] = self.pairwise_atom_funcs[feat_name](
+                self._toy_mol
+            ).shape[-1]
+        else:
+            raise ValueError(f"Feature name {feat_name} is not defined !")
+    return self._feat_sizes[feat_name]
+
+
+
+ +
+ + +
+ + + + +

+ from_state_dict(state_dict, override_args=None) + + + classmethod + + +

+ + +
+ +

Create an instance of an atom calculator from a state dict

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
state_dict + +
+

state dictionary to use to create the atom calculator

+
+
+ required +
override_args + Optional[dict] + +
+

optional dictionary of arguments to override the ones in the state dict +at construction of the new object

+
+
+ None +
+ +
+ Source code in molfeat/calc/bond.py +
291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
@classmethod
+def from_state_dict(cls, state_dict, override_args: Optional[dict] = None):
+    """Create an instance of an atom calculator from a state dict
+
+    Args:
+        state_dict: state dictionary to use to create the atom calculator
+        override_args: optional dictionary of arguments to override the ones in the state dict
+            at construction of the new object
+    """
+    # EN: at this moment, version compatibility is not enforced
+    cls_name = state_dict.get("name", cls.__name__)
+    module_name = state_dict.get("module", cls.__module__)
+    module = importlib.import_module(module_name)
+    klass = getattr(module, cls_name)
+
+    kwargs = state_dict["args"].copy()
+    # now we need to unpickle the featurizer functions
+    featurizer_fn_pickled = kwargs.pop("featurizer_funcs", None)
+    if featurizer_fn_pickled is not None:
+        featurizer_fn_loaded = {}
+        for k, v in featurizer_fn_pickled.items():
+            featurizer_fn_loaded[k] = hex_to_fn(v)
+        kwargs["featurizer_funcs"] = featurizer_fn_loaded
+
+    pairwise_atom_fn_pickled = kwargs.pop("pairwise_atom_funcs", None)
+    if pairwise_atom_fn_pickled is not None:
+        pairwise_atom_fn_loaded = {}
+        for k, v in pairwise_atom_fn_pickled.items():
+            pairwise_atom_fn_loaded[k] = hex_to_fn(v)
+        kwargs["pairwise_atom_funcs"] = pairwise_atom_fn_loaded
+    kwargs.update(**(override_args or {}))
+    return klass(**kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Convert the Atom calculator to a state dict +Due to some constraints and cross-version compatibility, the featurizer functions +need to be pickled and not just list

+ +
+ Source code in molfeat/calc/bond.py +
278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
def to_state_dict(self):
+    """Convert the Atom calculator to a state dict
+    Due to some constraints and cross-version compatibility,  the featurizer functions
+    need to be pickled and not just list
+    """
+    state_dict = super().to_state_dict()
+    # repeat for the pairwise one
+    pairwise_atom_fn_pickled = {}
+    for fname, ffunc in self.pairwise_atom_funcs.items():
+        pairwise_atom_fn_pickled[fname] = fn_to_hex(ffunc)
+    state_dict["args"]["pairwise_atom_funcs"] = pairwise_atom_fn_pickled
+    return state_dict
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.plugins.html b/0.9.7/api/molfeat.plugins.html new file mode 100644 index 0000000..82f625f --- /dev/null +++ b/0.9.7/api/molfeat.plugins.html @@ -0,0 +1,3679 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.plugins - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

molfeat.plugins

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ BaseFactory(group, name, load=True) + +

+ + +
+ +

Return the plugin class registered under a given entry point group and name.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
group + str + +
+

entry point group

+
+
+ required +
name + str + +
+

entry point name

+
+
+ required +
load + bool + +
+

if True, load the matched entry point and return the loaded resource instead of the entry point itself.

+
+
+ True +
+

Return: + the plugin class +Raises: + MissingEntryPointError: entry point was not registered + MultipleEntryPointError: entry point could not be uniquely resolved + LoadingEntryPointError: entry point could not be loaded

+ +
+ Source code in molfeat/plugins/factories.py +
59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
def BaseFactory(group: str, name: str, load: bool = True) -> Union[EntryPoint, Any]:
+    """Return the plugin class registered under a given entry point group and name.
+
+    Args:
+        group: entry point group
+        name: entry point name
+        load: if True, load the matched entry point and return the loaded resource instead of the entry point itself.
+    Return:
+        the plugin class
+    Raises:
+        MissingEntryPointError: entry point was not registered
+        MultipleEntryPointError: entry point could not be uniquely resolved
+        LoadingEntryPointError: entry point could not be loaded
+    """
+    # circular import
+    from .entry_point import get_entry_point, load_entry_point
+
+    if load is True:
+        return load_entry_point(group, name)
+
+    return get_entry_point(group, name)
+
+
+
+ +
+ + +
+ + + + +

+ CalculatorFactory(entry_point_name, load=True, entry_point_group=None) + +

+ + +
+ +

Return the SerializableCalculator sub class registered under the given entry point.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
entry_point_name + str + +
+

the entry point name.

+
+
+ required +
load + bool + +
+

if True, load the matched entry point and return the loaded resource instead of the entry point itself.

+
+
+ True +
entry_point_group + Optional[str] + +
+

the optional entry point group to use

+
+
+ None +
+ +
+ Return +

sub class of 🇵🇾class:~molfeat.calc.SerializableCalculator

+
+
+ Source code in molfeat/plugins/factories.py +
 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
def CalculatorFactory(
+    entry_point_name: str,
+    load: bool = True,
+    entry_point_group: Optional[str] = None,
+) -> Union[EntryPoint, Type["SerializableCalculator"], Callable]:
+    """Return the `SerializableCalculator` sub class registered under the given entry point.
+
+    Args:
+        entry_point_name: the entry point name.
+        load: if True, load the matched entry point and return the loaded resource instead of the entry point itself.
+        entry_point_group: the optional entry point group to use
+
+    Return:
+        sub class of :py:class:`~molfeat.calc.SerializableCalculator`
+    """
+    from molfeat.calc import SerializableCalculator
+
+    if entry_point_group is None:
+        entry_point_group = "molfeat.calc"
+    entry_point = BaseFactory(entry_point_group, entry_point_name, load=load)
+    valid_classes = (SerializableCalculator,)
+
+    if not load:
+        return entry_point
+
+    # if the entry point is a module, nothing to do
+    if ismodule(entry_point):
+        return entry_point
+    if isclass(entry_point) and issubclass(entry_point, valid_classes):
+        return entry_point
+
+    raise_invalid_type_error(entry_point_name, entry_point_group, valid_classes)
+
+
+
+ +
+ + +
+ + + + +

+ DefaultFactory(entry_point_name, load=True, entry_point_group=None) + +

+ + +
+ +

Return the Default factory for extending capabilities given a specific module.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
entry_point_name + str + +
+

the entry point name.

+
+
+ required +
load + bool + +
+

if True, load the matched entry point and return the loaded resource instead of the entry point itself.

+
+
+ True +
entry_point_group + str + +
+

the optional entry point group to use

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Union[EntryPoint, Type[PretrainedMolTransformer], Callable] + +
+

sub class or module of

+
+
+ +
+ Raise +

InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.

+
+
+ Source code in molfeat/plugins/factories.py +
253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
def DefaultFactory(
+    entry_point_name: str,
+    load: bool = True,
+    entry_point_group: str = None,
+) -> Union[EntryPoint, Type["PretrainedMolTransformer"], Callable]:
+    """Return the Default factory for extending capabilities given a specific module.
+
+    Args:
+        entry_point_name: the entry point name.
+        load: if True, load the matched entry point and return the loaded resource instead of the entry point itself.
+        entry_point_group: the optional entry point group to use
+
+    Returns:
+        sub class or module of
+
+    Raise:
+        InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.
+    """
+
+    if entry_point_group is None:
+        entry_point_group = "molfeat"
+    entry_point = BaseFactory(entry_point_group, entry_point_name, load=load)
+
+    if not load:
+        return entry_point
+    # if the entry point is a module, nothing to do
+    if ismodule(entry_point):
+        return entry_point
+    raise_invalid_type_error(entry_point_name, entry_point_group, ())
+
+
+
+ +
+ + +
+ + + + +

+ PretrainedTransformerFactory(entry_point_name, load=True, entry_point_group=None) + +

+ + +
+ +

Return the PretrainedMolTransformer sub class registered under the given entry point.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
entry_point_name + str + +
+

the entry point name.

+
+
+ required +
load + bool + +
+

if True, load the matched entry point and return the loaded resource instead of the entry point itself.

+
+
+ True +
entry_point_group + Optional[str] + +
+

the optional entry point group to use

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Union[EntryPoint, Type[PretrainedMolTransformer], Callable] + +
+

sub class of 🇵🇾class:~molfeat.trans.pretrained.PretrainedMolTransformer

+
+
+ +
+ Raise +

InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.

+
+
+ Source code in molfeat/plugins/factories.py +
200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
def PretrainedTransformerFactory(
+    entry_point_name: str,
+    load: bool = True,
+    entry_point_group: Optional[str] = None,
+) -> Union[EntryPoint, Type["PretrainedMolTransformer"], Callable]:
+    """Return the PretrainedMolTransformer sub class registered under the given entry point.
+
+    Args:
+        entry_point_name: the entry point name.
+        load: if True, load the matched entry point and return the loaded resource instead of the entry point itself.
+        entry_point_group: the optional entry point group to use
+
+    Returns:
+        sub class of :py:class:`~molfeat.trans.pretrained.PretrainedMolTransformer`
+
+    Raise:
+        InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.
+    """
+    from molfeat.trans import MoleculeTransformer
+    from molfeat.trans.pretrained import PretrainedMolTransformer
+
+    if entry_point_group is None:
+        entry_point_group = "molfeat.trans.pretrained"
+    entry_point = BaseFactory(entry_point_group, entry_point_name, load=load)
+    valid_classes = (PretrainedMolTransformer, MoleculeTransformer)
+
+    if not load:
+        return entry_point
+    # if the entry point is a module, nothing to do
+    if ismodule(entry_point):
+        return entry_point
+    if isclass(entry_point) and issubclass(entry_point, valid_classes):
+        return entry_point
+
+    raise_invalid_type_error(entry_point_name, entry_point_group, valid_classes)
+
+
+
+ +
+ + +
+ + + + +

+ TransformerFactory(entry_point_name, load=True, entry_point_group=None) + +

+ + +
+ +

Return the MoleculeTransformer sub class registered under the given entry point.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
entry_point_name + str + +
+

the entry point name.

+
+
+ required +
load + bool + +
+

if True, load the matched entry point and return the loaded resource instead of the entry point itself.

+
+
+ True +
entry_point_group + Optional[str] + +
+

the optional entry point group to use

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Union[EntryPoint, Type[MoleculeTransformer], Callable] + +
+

sub class of 🇵🇾class:~molfeat.trans.MoleculeTransformer

+
+
+ +
+ Raise +

InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.

+
+
+ Source code in molfeat/plugins/factories.py +
148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
def TransformerFactory(
+    entry_point_name: str,
+    load: bool = True,
+    entry_point_group: Optional[str] = None,
+) -> Union[EntryPoint, Type["MoleculeTransformer"], Callable]:
+    """Return the `MoleculeTransformer` sub class registered under the given entry point.
+
+    Args:
+        entry_point_name: the entry point name.
+        load: if True, load the matched entry point and return the loaded resource instead of the entry point itself.
+        entry_point_group: the optional entry point group to use
+
+    Returns:
+        sub class of :py:class:`~molfeat.trans.MoleculeTransformer`
+
+    Raise:
+        InvalidEntryPointTypeError: if the type of the loaded entry point is invalid.
+    """
+    from molfeat.trans import MoleculeTransformer
+    from molfeat.trans import BaseFeaturizer
+
+    if entry_point_group is None:
+        entry_point_group = "molfeat.trans"
+    entry_point = BaseFactory(entry_point_group, entry_point_name, load=load)
+    valid_classes = (MoleculeTransformer, BaseFeaturizer)
+
+    if not load:
+        return entry_point
+
+    # if the entry point is a module, nothing to do
+    if ismodule(entry_point):
+        return entry_point
+    if isclass(entry_point) and issubclass(entry_point, valid_classes):
+        return entry_point
+
+    raise_invalid_type_error(entry_point_name, entry_point_group, valid_classes)
+
+
+
+ +
+ + +
+ + + + +

+ raise_invalid_type_error(entry_point_name, entry_point_group, valid_classes) + +

+ + +
+ +

Raise an InvalidEntryPointTypeError with formatted message.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
entry_point_name + str + +
+

name of the entry point

+
+
+ required +
entry_point_group + str + +
+

name of the entry point group

+
+
+ required +
valid_classes + Tuple[Any, ...] + +
+

tuple of valid classes for the given entry point group

+
+
+ required +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ InvalidEntryPointTypeError + +
+

always

+
+
+ +
+ Source code in molfeat/plugins/factories.py +
37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
def raise_invalid_type_error(
+    entry_point_name: str, entry_point_group: str, valid_classes: Tuple[Any, ...]
+) -> NoReturn:
+    """Raise an `InvalidEntryPointTypeError` with formatted message.
+
+    Args:
+        entry_point_name: name of the entry point
+        entry_point_group: name of the entry point group
+        valid_classes: tuple of valid classes for the given entry point group
+
+    Raises:
+        InvalidEntryPointTypeError: always
+    """
+    template = "entry point `{}` registered in group `{}` is invalid because its type is not one of the supported types ({})"
+    args = (
+        entry_point_name,
+        entry_point_group,
+        ", ".join([e.__name__ for e in valid_classes]),
+    )
+    raise InvalidEntryPointTypeError(template.format(*args))
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ get_entry_point(group, name) + +

+ + +
+ +

Return an entry point with a given name within a specific group +Args: + group: the entry point group + name: the name of the entry point

+ +
+ Source code in molfeat/plugins/entry_point.py +
 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
def get_entry_point(group: str, name: str) -> EntryPoint:
+    """
+    Return an entry point with a given name within a specific group
+    Args:
+        group: the entry point group
+        name: the name of the entry point
+    """
+    found = eps().select(group=group, name=name)
+    if name not in found.names:
+        raise MissingEntryPointError(f"Entry point '{name}' not found in group '{group}'")
+    # If multiple entry points are found and they have different values we raise, otherwise if they all
+    # correspond to the same value, we simply return one of them
+    if len(found) > 1 and len(set(ep.value for ep in found)) != 1:
+        raise MultipleEntryPointError(
+            f"Multiple entry points '{name}' found in group '{group}': {found}"
+        )
+    return found[name]
+
+
+
+ +
+ + +
+ + + + +

+ get_entry_points(group) + +

+ + +
+ +

Return a list of all the entry points within a specific group

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
group + str + +
+

the entry point group

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

a list of entry points

+
+
+ +
+ Source code in molfeat/plugins/entry_point.py +
74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
def get_entry_points(group: str):
+    """
+    Return a list of all the entry points within a specific group
+
+    Args:
+        group: the entry point group
+
+    Returns:
+        a list of entry points
+    """
+    return eps().select(group=group)
+
+
+
+ +
+ + +
+ + + + +

+ is_registered_entry_point(class_module, class_name, groups=None) + + + cached + + +

+ + +
+ +

Verify whether the class with the given module and class name is a registered entry point.

+
+

Note

+

This function only checks whether the class has a registered entry point. It does explicitly not verify +if the corresponding class is also importable. Use load_entry_point for this purpose instead.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
class_module + str + +
+

the module of the class

+
+
+ required +
class_name + str + +
+

the name of the class

+
+
+ required +
groups + Optional[Sequence[str]] + +
+

optionally consider only these entry point groups to look for the class

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ bool + +
+

True if the class is a registered entry point, False otherwise.

+
+
+ +
+ Source code in molfeat/plugins/entry_point.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
@functools.lru_cache(maxsize=100)
+def is_registered_entry_point(
+    class_module: str, class_name: str, groups: Optional[Sequence[str]] = None
+) -> bool:
+    """Verify whether the class with the given module and class name is a registered entry point.
+
+    !!! note
+        This function only checks whether the class has a registered entry point. It does explicitly not verify
+        if the corresponding class is also importable. Use `load_entry_point` for this purpose instead.
+
+    Args:
+        class_module: the module of the class
+        class_name: the name of the class
+        groups: optionally consider only these entry point groups to look for the class
+
+    Returns:
+        True if the class is a registered entry point, False otherwise.
+    """
+    for group in eps().groups if groups is None else groups:
+        for entry_point in get_entry_points(group):
+            if class_module == entry_point.module and class_name == entry_point.attr:
+                return True
+    return False
+
+
+
+ +
+ + +
+ + + + +

+ load_entry_point(group, name) + +

+ + +
+ +

Load the class registered under the entry point for a given name and group

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
group + str + +
+

the entry point group

+
+
+ required +
name + str + +
+

the name of the entry point

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ Any + +
+

class registered at the given entry point

+
+
+ + + +

Raises:

+ + + + + + + + + + + + + + + + + + + + + +
TypeDescription
+ MissingEntryPointError + +
+

if the entry point was not registered

+
+
+ MultipleEntryPointError + +
+

if the entry point could not be uniquely resolved

+
+
+ LoadingEntryPointError + +
+

if the entry point could not be loaded

+
+
+ +
+ Source code in molfeat/plugins/entry_point.py +
46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
def load_entry_point(group: str, name: str) -> Any:
+    """
+    Load the class registered under the entry point for a given name and group
+
+    Args:
+        group: the entry point group
+        name: the name of the entry point
+
+    Returns:
+        class registered at the given entry point
+
+    Raises:
+        MissingEntryPointError: if the entry point was not registered
+        MultipleEntryPointError: if the entry point could not be uniquely resolved
+        LoadingEntryPointError: if the entry point could not be loaded
+    """
+    entry_point = get_entry_point(group, name)
+
+    try:
+        loaded_entry_point = entry_point.load()
+    except ImportError:
+        raise LoadingEntryPointError(
+            f"Failed to load entry point '{name}':\n{traceback.format_exc()}"
+        )
+
+    return loaded_entry_point
+
+
+
+ +
+ + +
+ + + + +

+ load_registered_plugins(add_submodules=True, groups=None, plugins=None, verbose=True) + +

+ + +
+ +

Load all registered entry points by loading them with the corresponding factory and adding them to the corresponding module attribute.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
add_submodules + bool + +
+

if True, add the loaded entry point to the corresponding module attribute.

+
+
+ True +
groups + Optional[List[str]] + +
+

if provided, only load entry points from the given groups.

+
+
+ None +
plugins + Optional[List[str]] + +
+

if provided, only load entry points or modules/classes that matches entry in the plugins list.

+
+
+ None +
verbose + bool + +
+

if True, log a warning if an entry point cannot be loaded.

+
+
+ True +
+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ EntryPointError + +
+

if any of the registered entry points cannot be loaded. This can happen if: +* The entry point cannot uniquely be resolved +* The resource registered at the entry point cannot be imported +* The resource's type is incompatible with the entry point group that it is defined in.

+
+
+ +
+ Source code in molfeat/plugins/entry_point.py +
146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
def load_registered_plugins(
+    add_submodules: bool = True,
+    groups: Optional[List[str]] = None,
+    plugins: Optional[List[str]] = None,
+    verbose: bool = True,
+):
+    """Load all registered entry points by loading them with the corresponding factory and adding them to the corresponding module attribute.
+
+    Args:
+        add_submodules: if True, add the loaded entry point to the corresponding module attribute.
+        groups: if provided, only load entry points from the given groups.
+        plugins: if provided, only load entry points or modules/classes that matches entry in the plugins list.
+        verbose: if True, log a warning if an entry point cannot be loaded.
+
+    Raises:
+        EntryPointError: if any of the registered entry points cannot be loaded. This can happen if:
+            * The entry point cannot uniquely be resolved
+            * The resource registered at the entry point cannot be imported
+            * The resource's type is incompatible with the entry point group that it is defined in.
+    """
+    for entry_point_group, factory in ENTRY_POINT_GROUP_FACTORYCLASS_MAPPING.items():
+        if groups is not None and entry_point_group not in groups:
+            continue
+        entry_points = get_entry_points(entry_point_group)
+        for entry_point in entry_points:
+            try:
+                loaded_module = factory(entry_point.name, entry_point_group=entry_point_group)
+                if _is_valid_plugin(loaded_module, plugins):
+                    setattr(
+                        sys.modules[entry_point.group],
+                        loaded_module.__name__,
+                        loaded_module,
+                    )
+                    if add_submodules:
+                        if not ismodule(loaded_module):
+                            module_to_add = loaded_module.__module__
+                        else:
+                            module_to_add = loaded_module
+                        sys.modules[f"{entry_point.group}.{entry_point.name}"] = module_to_add
+            except AttributeError as e:
+                if verbose:
+                    logger.warning(
+                        f"Could not load entry point {entry_point.name} from group {entry_point.group}"
+                    )
+                    logger.exception(e)
+
+
+
+ +
+ + +
+ + + + +

+ validate_registered_entry_points() + +

+ + +
+ +

Validate all registered entry points by loading them with the corresponding factory.

+ + + +

Raises:

+ + + + + + + + + + + + + +
TypeDescription
+ EntryPointError + +
+

if any of the registered entry points cannot be loaded. This can happen if: +* The entry point cannot uniquely be resolved +* The resource registered at the entry point cannot be imported +* The resource's type is incompatible with the entry point group that it is defined in.

+
+
+ +
+ Source code in molfeat/plugins/entry_point.py +
131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
def validate_registered_entry_points():
+    """Validate all registered entry points by loading them with the corresponding factory.
+
+    Raises:
+        EntryPointError: if any of the registered entry points cannot be loaded. This can happen if:
+            * The entry point cannot uniquely be resolved
+            * The resource registered at the entry point cannot be imported
+            * The resource's type is incompatible with the entry point group that it is defined in.
+    """
+    for entry_point_group, factory in ENTRY_POINT_GROUP_FACTORYCLASS_MAPPING.items():
+        entry_points = get_entry_points(entry_point_group)
+        for entry_point in entry_points:
+            factory(entry_point.name)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.store.html b/0.9.7/api/molfeat.store.html new file mode 100644 index 0000000..4302635 --- /dev/null +++ b/0.9.7/api/molfeat.store.html @@ -0,0 +1,4967 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.store - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.store

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ModelInfo + + +

+ + +
+

+ Bases: BaseModel

+ + +
+ Source code in molfeat/store/modelcard.py +
 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
class ModelInfo(BaseModel):
+    model_config = ConfigDict(
+        protected_namespaces=(
+            "protected_",
+        )  # Prevents warning from usage of model_ prefix in fields
+    )
+
+    name: str
+    inputs: str = "smiles"
+    type: Literal["pretrained", "hand-crafted", "hashed", "count"]
+    version: int = 0
+    group: Optional[str] = "all"
+    submitter: str
+    description: str
+    representation: Literal["graph", "line-notation", "vector", "tensor", "other"]
+    require_3D: Optional[bool] = False
+    tags: Optional[List[str]] = []
+    authors: Optional[List[str]]
+    reference: Optional[str] = None
+    created_at: datetime = Field(default_factory=datetime.now)
+    sha256sum: Optional[str] = None
+    model_usage: Optional[str] = None
+
+    def path(self, root_path: str):
+        """Generate the folder path where to save this model
+
+        Args:
+            root_path: path to the root folder
+        """
+        version = str(self.version or 0)
+        return dm.fs.join(root_path, self.group, self.name, version)
+
+    def match(self, new_card: Union["ModelInfo", dict], match_only: Optional[List[str]] = None):
+        """Compare two model card information and returns True if they are the same
+
+        Args:
+            new_card: card to search for in the modelstore
+            match_only: list of minimum attribute that should match between the two model information
+        """
+
+        self_content = self.model_dump().copy()
+        if not isinstance(new_card, dict):
+            new_card = new_card.model_dump()
+        new_content = new_card.copy()
+        # we always remove the datetime field
+        self_content.pop("created_at", None)
+        new_content.pop("created_at", None)
+        if match_only is not None:
+            self_content = {k: self_content.get(k) for k in match_only}
+            new_content = {k: new_content.get(k) for k in match_only}
+        return self_content == new_content
+
+    def set_usage(self, usage: str):
+        """Set the usage of the model
+
+        Args:
+            usage: usage of the model
+        """
+        self.model_usage = usage
+
+    def usage(self):
+        """Return the usage of the model"""
+        if self.model_usage is not None and self.model_usage:
+            return self.model_usage
+        import_statement, loader_statement = get_model_init(self)
+        comment = "# sanitize and standardize your molecules if needed"
+        if self.require_3D:
+            comment += "\n# <generate 3D coordinates here> "
+        usage = f"""
+        import datamol as dm
+        {import_statement}
+        smiles = dm.freesolv().iloc[:100].smiles
+        {comment}
+        transformer = {loader_statement}
+        features = transformer(smiles)
+        """
+        usage = "\n".join([x.strip() for x in usage.split("\n")])
+        return usage
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ match(new_card, match_only=None) + +

+ + +
+ +

Compare two model card information and returns True if they are the same

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_card + Union[ModelInfo, dict] + +
+

card to search for in the modelstore

+
+
+ required +
match_only + Optional[List[str]] + +
+

list of minimum attribute that should match between the two model information

+
+
+ None +
+ +
+ Source code in molfeat/store/modelcard.py +
 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
def match(self, new_card: Union["ModelInfo", dict], match_only: Optional[List[str]] = None):
+    """Compare two model card information and returns True if they are the same
+
+    Args:
+        new_card: card to search for in the modelstore
+        match_only: list of minimum attribute that should match between the two model information
+    """
+
+    self_content = self.model_dump().copy()
+    if not isinstance(new_card, dict):
+        new_card = new_card.model_dump()
+    new_content = new_card.copy()
+    # we always remove the datetime field
+    self_content.pop("created_at", None)
+    new_content.pop("created_at", None)
+    if match_only is not None:
+        self_content = {k: self_content.get(k) for k in match_only}
+        new_content = {k: new_content.get(k) for k in match_only}
+    return self_content == new_content
+
+
+
+ +
+ + +
+ + + + +

+ path(root_path) + +

+ + +
+ +

Generate the folder path where to save this model

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
root_path + str + +
+

path to the root folder

+
+
+ required +
+ +
+ Source code in molfeat/store/modelcard.py +
74
+75
+76
+77
+78
+79
+80
+81
def path(self, root_path: str):
+    """Generate the folder path where to save this model
+
+    Args:
+        root_path: path to the root folder
+    """
+    version = str(self.version or 0)
+    return dm.fs.join(root_path, self.group, self.name, version)
+
+
+
+ +
+ + +
+ + + + +

+ set_usage(usage) + +

+ + +
+ +

Set the usage of the model

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
usage + str + +
+

usage of the model

+
+
+ required +
+ +
+ Source code in molfeat/store/modelcard.py +
103
+104
+105
+106
+107
+108
+109
def set_usage(self, usage: str):
+    """Set the usage of the model
+
+    Args:
+        usage: usage of the model
+    """
+    self.model_usage = usage
+
+
+
+ +
+ + +
+ + + + +

+ usage() + +

+ + +
+ +

Return the usage of the model

+ +
+ Source code in molfeat/store/modelcard.py +
111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
def usage(self):
+    """Return the usage of the model"""
+    if self.model_usage is not None and self.model_usage:
+        return self.model_usage
+    import_statement, loader_statement = get_model_init(self)
+    comment = "# sanitize and standardize your molecules if needed"
+    if self.require_3D:
+        comment += "\n# <generate 3D coordinates here> "
+    usage = f"""
+    import datamol as dm
+    {import_statement}
+    smiles = dm.freesolv().iloc[:100].smiles
+    {comment}
+    transformer = {loader_statement}
+    features = transformer(smiles)
+    """
+    usage = "\n".join([x.strip() for x in usage.split("\n")])
+    return usage
+
+
+
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + +

+ get_model_init(card) + +

+ + +
+ +

Get the model initialization code

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
card + +
+

model card to use

+
+
+ required +
+ +
+ Source code in molfeat/store/modelcard.py +
 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
def get_model_init(card):
+    """Get the model initialization code
+
+    Args:
+        card: model card to use
+    """
+    if card.group == "all" and card.type != "pretrained":
+        import_statement = "from molfeat.trans import MoleculeTransformer"
+        loader_statement = f"MoleculeTransformer(featurizer='{card.name}', dtype=float)"
+    elif card.group in ["rdkit", "fp", "shape"]:
+        import_statement = "from molfeat.trans.fp import FPVecTransformer"
+        loader_statement = f"FPVecTransformer(kind='{card.name}', dtype=float)"
+    elif card.group == "dgllife":
+        import_statement = "from molfeat.trans.pretrained import PretrainedDGLTransformer"
+        loader_statement = f"PretrainedDGLTransformer(kind='{card.name}', dtype=float)"
+    elif card.group == "graphormer":
+        import_statement = "from molfeat.trans.pretrained import GraphormerTransformer"
+        loader_statement = f"GraphormerTransformer(kind='{card.name}', dtype=float)"
+    elif card.group == "fcd":
+        import_statement = "from molfeat.trans.pretrained import FCDTransformer"
+        loader_statement = "FCDTransformer()"
+    elif card.group == "pharmacophore":
+        name = card.name.split("-")[-1]
+        if card.require_3D:
+            import_class = "Pharmacophore3D"
+        else:
+            import_class = "Pharmacophore2D"
+        import_statement = f"from molfeat.trans.base import MoleculeTransformer\nfrom molfeat.calc.pharmacophore import {import_class}"
+        loader_statement = (
+            f"MoleculeTransformer(featurizer={import_class}(factory='{name}'), dtype=float)"
+        )
+    elif card.group == "huggingface":
+        import_statement = (
+            "from molfeat.trans.pretrained.hf_transformers import PretrainedHFTransformer"
+        )
+        loader_statement = (
+            f"PretrainedHFTransformer(kind='{card.name}', notation='{card.inputs}', dtype=float)"
+        )
+    else:
+        raise ValueError(f"Unknown model group {card.group}")
+    return import_statement, loader_statement
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ModelStore + + +

+ + +
+ + +

A class for artefact serializing from any url

+

This class not only allow pretrained model serializing and loading, +but also help in listing model availability and registering models.

+

For simplicity. + * There is no versioning. + * Only one model should match a given name + * Model deletion is not allowed (on the read-only default store) + * Only a single store is supported per model store instance

+
+

Building a New Model Store

+

To create a new model store, you will mainly need a model store bucket path. The default model store bucket, located at gs://molfeat-store-prod/artifacts/, is read-only.

+

To build your own model store bucket, follow the instructions below:

+
    +
  1. Create a local or remote cloud directory that can be accessed by fsspec (and the corresponding filesystem).
  2. +
  3. [Optional] Sync the default model store bucket to your new path if you want to access the default models.
  4. +
  5. Set the environment variable MOLFEAT_MODEL_STORE_BUCKET to your new path. This variable will be used as the default model store bucket when creating a new model store instance without specifying a path. + Note that setting up this path is necessary if you want to access models directly by their names, without manually loading them from your custom model store.
  6. +
+
+ +
+ Source code in molfeat/store/modelstore.py +
 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
class ModelStore:
+    """A class for artefact serializing from any url
+
+    This class not only allow pretrained model serializing and loading,
+    but also help in listing model availability and registering models.
+
+    For simplicity.
+        * There is no versioning.
+        * Only one model should match a given name
+        * Model deletion is not allowed (on the read-only default store)
+        * Only a single store is supported per model store instance
+
+    !!! note "Building a New Model Store"
+        To create a new model store, you will mainly need a model store bucket path. The default model store bucket, located at `gs://molfeat-store-prod/artifacts/`, is **read-only**.
+
+        To build your own model store bucket, follow the instructions below:
+
+        1. Create a local or remote cloud directory that can be accessed by fsspec (and the corresponding filesystem).
+        2. [Optional] Sync the default model store bucket to your new path if you want to access the default models.
+        3. Set the environment variable `MOLFEAT_MODEL_STORE_BUCKET` to your new path. This variable will be used as the default model store bucket when creating a new model store instance without specifying a path.
+            Note that setting up this path is necessary if you want to access models directly by their names, without manually loading them from your custom model store.
+
+
+    """
+
+    # EN: be careful not to recreate ada
+    # EN: should we just use modelstore ?
+    MODEL_STORE_BUCKET = "gs://molfeat-store-prod/artifacts/"
+    MODEL_PATH_NAME = "model.save"
+    METADATA_PATH_NAME = "metadata.json"
+
+    def __init__(self, model_store_bucket: Optional[str] = None):
+        if model_store_bucket is None:
+            model_store_bucket = os.getenv("MOLFEAT_MODEL_STORE_BUCKET", self.MODEL_STORE_BUCKET)
+        self.model_store_bucket = model_store_bucket
+        self._available_models = []
+        self._update_store()
+
+    def _update_store(self):
+        """Initialize the store with all available models"""
+        all_metadata = dm.fs.glob(dm.fs.join(self.model_store_bucket, "**/metadata.json"))
+        self._available_models = []
+        for mtd_file in all_metadata:
+            with fsspec.open(mtd_file, "r") as IN:
+                mtd_content = yaml.safe_load(IN)
+                model_info = ModelInfo(**mtd_content)
+                self._available_models.append(model_info)
+
+    @property
+    def available_models(self):
+        """Return a list of all models that have been serialized in molfeat"""
+        return self._available_models
+
+    def __len__(self):
+        """Return the length of the model store"""
+        return len(self.available_models)
+
+    def register(
+        self,
+        modelcard: Union[ModelInfo, dict],
+        model: Optional[Any] = None,
+        chunk_size: int = 2048,
+        save_fn: Optional[Callable] = None,
+        save_fn_kwargs: Optional[dict] = None,
+        force: bool = True,
+    ):
+        """
+        Register a new model to the store
+
+        !!! note `save_fn`
+            You can pass additional kwargs for your `save_fn` through the `save_fn_kwargs` argument.
+            It's expected that `save_fn` will be called as : `save_fn(model, <model_upload_path>, **save_fn_wargs)`,
+            with `<model_upload_path>` being provided by the model store, and that it will return the path to the serialized model.
+            If not provided, `joblib.dump` is used by default.
+
+        Args:
+            modelcard: Model information
+            model: A path to the model artifact or any object that needs to be saved
+            chunk_size: the chunk size for the upload
+            save_fn: any custom function for serializing the model, that takes the model, the upload path and parameters `save_fn_kwargs` as inputs.
+            save_fn_kwargs: any additional kwargs to pass to save_fn
+            force: whether to force upload to the bucket
+
+        """
+        if not isinstance(modelcard, ModelInfo):
+            modelcard = ModelInfo(**modelcard)
+        # we save the model first
+        if self.exists(card=modelcard):
+            logger.warning(f"Model {modelcard.name} exists already ...")
+            if not force:
+                return
+
+        model_root_dir = modelcard.path(self.model_store_bucket)
+        model_path = model
+        model_upload_path = dm.fs.join(model_root_dir, self.MODEL_PATH_NAME)
+        model_metadata_upload_path = dm.fs.join(model_root_dir, self.METADATA_PATH_NAME)
+
+        save_fn_kwargs = save_fn_kwargs or {}
+        if save_fn is None:
+            if not isinstance(model, (pathlib.Path, os.PathLike)):
+                local_model_path = tempfile.NamedTemporaryFile(delete=False)
+                with local_model_path:
+                    joblib.dump(model, local_model_path)
+                model_path = local_model_path.name
+            # Upload the artifact to the bucket
+            dm.fs.copy_file(
+                model_path,
+                model_upload_path,
+                progress=True,
+                leave_progress=False,
+                chunk_size=chunk_size,
+                force=force,
+            )
+        else:
+            model_path = save_fn(model, model_upload_path, **save_fn_kwargs)
+            # we reset to None if the save_fn has not returned anything
+            model_path = model_path or model_upload_path
+        modelcard.sha256sum = commons.sha256sum(model_path)
+        # then we save the metadata as json
+        with fsspec.open(model_metadata_upload_path, "w") as OUT:
+            OUT.write(modelcard.json())
+        self._update_store()
+        logger.info(f"Successfuly registered model {modelcard.name} !")
+
+    def _filelock(self, lock_name: str):
+        """Create an empty lock file into `cache_dir_path/locks/lock_name`"""
+
+        lock_path = dm.fs.join(
+            str(platformdirs.user_cache_dir("molfeat")), "_lock_files", lock_name
+        )
+        dm.fs.get_mapper(lock_path)
+        # ensure file is created
+        # out = mapper.fs.touch(lock_path) # does not work  -_-
+        with fsspec.open(lock_path, "w", auto_mkdir=True):
+            pass
+
+        return filelock.FileLock(lock_path)
+
+    def download(
+        self,
+        modelcard: ModelInfo,
+        output_dir: Optional[Union[os.PathLike, pathlib.Path]] = None,
+        chunk_size: int = 2048,
+        force: bool = False,
+    ):
+        """Download an artifact locally
+
+        Args:
+            modelcard: information on the model to download
+            output_dir: path where to save the downloaded artifact
+            chunk_size: chunk size to use for download
+            force: whether to force download even if the file exists already
+        """
+
+        remote_dir = modelcard.path(self.model_store_bucket)
+        model_name = modelcard.name
+        if not self.exists(modelcard, check_remote=True):
+            raise ModelStoreError(f"Model {model_name} does not exist in the model store !")
+
+        if output_dir is None:
+            output_dir = dm.fs.join(platformdirs.user_cache_dir("molfeat"), model_name)
+
+        dm.fs.mkdir(output_dir, exist_ok=True)
+
+        model_remote_path = dm.fs.join(remote_dir, self.MODEL_PATH_NAME)
+        model_dest_path = dm.fs.join(output_dir, self.MODEL_PATH_NAME)
+        metadata_remote_path = dm.fs.join(remote_dir, self.METADATA_PATH_NAME)
+        metadata_dest_path = dm.fs.join(output_dir, self.METADATA_PATH_NAME)
+
+        # avoid downloading if the file exists already
+        if (
+            not (
+                dm.fs.exists(metadata_dest_path)
+                and (dm.fs.exists(model_dest_path) == dm.fs.exists(model_remote_path))
+            )
+            or force
+        ):
+            # metadata should exists if the model exists
+            with self._filelock(f"{model_name}.metadata.json.lock"):
+                dm.fs.copy_file(
+                    metadata_remote_path,
+                    metadata_dest_path,
+                    progress=True,
+                    leave_progress=False,
+                    force=True,
+                )
+
+            if dm.fs.exists(model_remote_path):
+                with self._filelock(f"{model_name}.lock"):
+                    if dm.fs.is_dir(model_remote_path):
+                        # we copy the model dir
+                        dm.fs.copy_dir(
+                            model_remote_path,
+                            model_dest_path,
+                            progress=True,
+                            leave_progress=False,
+                            chunk_size=chunk_size,
+                            force=force,
+                        )
+                    else:
+                        # we copy the model dir
+                        dm.fs.copy_file(
+                            model_remote_path,
+                            model_dest_path,
+                            progress=True,
+                            leave_progress=False,
+                            chunk_size=chunk_size,
+                            force=force,
+                        )
+
+        cache_sha256sum = commons.sha256sum(model_dest_path)
+        if modelcard.sha256sum is not None and cache_sha256sum != modelcard.sha256sum:
+            mapper = dm.fs.get_mapper(output_dir)
+            mapper.fs.delete(output_dir, recursive=True)
+            raise ModelStoreError(
+                f"""The destination artifact at {model_dest_path} has a different sha256sum ({cache_sha256sum}) """
+                f"""than the Remote artifact sha256sum ({modelcard.sha256sum}). The destination artifact has been removed !"""
+            )
+
+        return output_dir
+
+    def load(
+        self,
+        model_name: Union[str, dict, ModelInfo],
+        load_fn: Optional[Callable] = None,
+        load_fn_kwargs: Optional[dict] = None,
+        download_output_dir: Optional[Union[os.PathLike, pathlib.Path]] = None,
+        chunk_size: int = 2048,
+        force: bool = False,
+    ):
+        """
+        Load a model by its name
+
+        Args:
+            model_name: name of the model to load
+            load_fn: Custom loading function to load the model
+            load_fn_kwargs: Optional dict of additional kwargs to provide to the loading function
+            download_output_dir: Argument for download function to specify the download folder
+            chunk_size: chunk size for download
+            force: whether to reforce the download of the file
+
+        Returns:
+            model: Optional model, if the model requires download or loading weights
+            model_info: model information card
+        """
+        if isinstance(model_name, str):
+            # find the model with the same name
+            modelcard = self.search(name=model_name)[0]
+        else:
+            modelcard = model_name
+        output_dir = self.download(
+            modelcard=modelcard,
+            output_dir=download_output_dir,
+            chunk_size=chunk_size,
+            force=force,
+        )
+        if load_fn is None:
+            load_fn = joblib.load
+        model_path = dm.fs.join(output_dir, self.MODEL_PATH_NAME)
+        metadata_path = dm.fs.join(output_dir, self.METADATA_PATH_NAME)
+
+        # deal with non-pretrained models that might not have a serialized file
+        model = None
+        load_fn_kwargs = load_fn_kwargs or {}
+        if dm.fs.exists(model_path):
+            model = load_fn(model_path, **load_fn_kwargs)
+        with fsspec.open(metadata_path, "r") as IN:
+            model_info_dict = yaml.safe_load(IN)
+        model_info = ModelInfo(**model_info_dict)
+        return model, model_info
+
+    def __contains__(self, card: Optional[ModelInfo] = None):
+        return self.exists(card)
+
+    def exists(
+        self,
+        card: Optional[ModelInfo] = None,
+        check_remote: bool = False,
+        **match_params,
+    ) -> bool:
+        """Returns True if a model is registered in the store
+
+        Args:
+            card: card of the model to check
+            check_remote: whether to check if the remote path of the model exists
+            match_params: parameters for matching as expected by `ModelInfo.match`
+        """
+
+        found = False
+        for model_info in self.available_models:
+            if model_info.match(card, **match_params):
+                found = True
+                break
+        return found and (not check_remote or dm.fs.exists(card.path(self.model_store_bucket)))
+
+    def search(self, modelcard: Optional[ModelInfo] = None, **search_kwargs):
+        """ "Return all model card that match the required search parameters
+
+        Args:
+            modelcard: model card to search for
+            search_kwargs: search parameters to use
+        """
+        search_infos = {}
+        found = []
+        if modelcard is not None:
+            search_infos = modelcard.dict().copy()
+        search_infos.update(search_kwargs)
+        for model in self.available_models:
+            if model.match(search_infos, match_only=list(search_infos.keys())):
+                found.append(model)
+        return found
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ available_models + + + property + + +

+ + +
+ +

Return a list of all models that have been serialized in molfeat

+
+ +
+ + + + +
+ + + + +

+ __len__() + +

+ + +
+ +

Return the length of the model store

+ +
+ Source code in molfeat/store/modelstore.py +
78
+79
+80
def __len__(self):
+    """Return the length of the model store"""
+    return len(self.available_models)
+
+
+
+ +
+ + +
+ + + + +

+ download(modelcard, output_dir=None, chunk_size=2048, force=False) + +

+ + +
+ +

Download an artifact locally

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
modelcard + ModelInfo + +
+

information on the model to download

+
+
+ required +
output_dir + Optional[Union[PathLike, Path]] + +
+

path where to save the downloaded artifact

+
+
+ None +
chunk_size + int + +
+

chunk size to use for download

+
+
+ 2048 +
force + bool + +
+

whether to force download even if the file exists already

+
+
+ False +
+ +
+ Source code in molfeat/store/modelstore.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
def download(
+    self,
+    modelcard: ModelInfo,
+    output_dir: Optional[Union[os.PathLike, pathlib.Path]] = None,
+    chunk_size: int = 2048,
+    force: bool = False,
+):
+    """Download an artifact locally
+
+    Args:
+        modelcard: information on the model to download
+        output_dir: path where to save the downloaded artifact
+        chunk_size: chunk size to use for download
+        force: whether to force download even if the file exists already
+    """
+
+    remote_dir = modelcard.path(self.model_store_bucket)
+    model_name = modelcard.name
+    if not self.exists(modelcard, check_remote=True):
+        raise ModelStoreError(f"Model {model_name} does not exist in the model store !")
+
+    if output_dir is None:
+        output_dir = dm.fs.join(platformdirs.user_cache_dir("molfeat"), model_name)
+
+    dm.fs.mkdir(output_dir, exist_ok=True)
+
+    model_remote_path = dm.fs.join(remote_dir, self.MODEL_PATH_NAME)
+    model_dest_path = dm.fs.join(output_dir, self.MODEL_PATH_NAME)
+    metadata_remote_path = dm.fs.join(remote_dir, self.METADATA_PATH_NAME)
+    metadata_dest_path = dm.fs.join(output_dir, self.METADATA_PATH_NAME)
+
+    # avoid downloading if the file exists already
+    if (
+        not (
+            dm.fs.exists(metadata_dest_path)
+            and (dm.fs.exists(model_dest_path) == dm.fs.exists(model_remote_path))
+        )
+        or force
+    ):
+        # metadata should exists if the model exists
+        with self._filelock(f"{model_name}.metadata.json.lock"):
+            dm.fs.copy_file(
+                metadata_remote_path,
+                metadata_dest_path,
+                progress=True,
+                leave_progress=False,
+                force=True,
+            )
+
+        if dm.fs.exists(model_remote_path):
+            with self._filelock(f"{model_name}.lock"):
+                if dm.fs.is_dir(model_remote_path):
+                    # we copy the model dir
+                    dm.fs.copy_dir(
+                        model_remote_path,
+                        model_dest_path,
+                        progress=True,
+                        leave_progress=False,
+                        chunk_size=chunk_size,
+                        force=force,
+                    )
+                else:
+                    # we copy the model dir
+                    dm.fs.copy_file(
+                        model_remote_path,
+                        model_dest_path,
+                        progress=True,
+                        leave_progress=False,
+                        chunk_size=chunk_size,
+                        force=force,
+                    )
+
+    cache_sha256sum = commons.sha256sum(model_dest_path)
+    if modelcard.sha256sum is not None and cache_sha256sum != modelcard.sha256sum:
+        mapper = dm.fs.get_mapper(output_dir)
+        mapper.fs.delete(output_dir, recursive=True)
+        raise ModelStoreError(
+            f"""The destination artifact at {model_dest_path} has a different sha256sum ({cache_sha256sum}) """
+            f"""than the Remote artifact sha256sum ({modelcard.sha256sum}). The destination artifact has been removed !"""
+        )
+
+    return output_dir
+
+
+
+ +
+ + +
+ + + + +

+ exists(card=None, check_remote=False, **match_params) + +

+ + +
+ +

Returns True if a model is registered in the store

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
card + Optional[ModelInfo] + +
+

card of the model to check

+
+
+ None +
check_remote + bool + +
+

whether to check if the remote path of the model exists

+
+
+ False +
match_params + +
+

parameters for matching as expected by ModelInfo.match

+
+
+ {} +
+ +
+ Source code in molfeat/store/modelstore.py +
299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
def exists(
+    self,
+    card: Optional[ModelInfo] = None,
+    check_remote: bool = False,
+    **match_params,
+) -> bool:
+    """Returns True if a model is registered in the store
+
+    Args:
+        card: card of the model to check
+        check_remote: whether to check if the remote path of the model exists
+        match_params: parameters for matching as expected by `ModelInfo.match`
+    """
+
+    found = False
+    for model_info in self.available_models:
+        if model_info.match(card, **match_params):
+            found = True
+            break
+    return found and (not check_remote or dm.fs.exists(card.path(self.model_store_bucket)))
+
+
+
+ +
+ + +
+ + + + +

+ load(model_name, load_fn=None, load_fn_kwargs=None, download_output_dir=None, chunk_size=2048, force=False) + +

+ + +
+ +

Load a model by its name

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
model_name + Union[str, dict, ModelInfo] + +
+

name of the model to load

+
+
+ required +
load_fn + Optional[Callable] + +
+

Custom loading function to load the model

+
+
+ None +
load_fn_kwargs + Optional[dict] + +
+

Optional dict of additional kwargs to provide to the loading function

+
+
+ None +
download_output_dir + Optional[Union[PathLike, Path]] + +
+

Argument for download function to specify the download folder

+
+
+ None +
chunk_size + int + +
+

chunk size for download

+
+
+ 2048 +
force + bool + +
+

whether to reforce the download of the file

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
model + +
+

Optional model, if the model requires download or loading weights

+
+
model_info + +
+

model information card

+
+
+ +
+ Source code in molfeat/store/modelstore.py +
246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
def load(
+    self,
+    model_name: Union[str, dict, ModelInfo],
+    load_fn: Optional[Callable] = None,
+    load_fn_kwargs: Optional[dict] = None,
+    download_output_dir: Optional[Union[os.PathLike, pathlib.Path]] = None,
+    chunk_size: int = 2048,
+    force: bool = False,
+):
+    """
+    Load a model by its name
+
+    Args:
+        model_name: name of the model to load
+        load_fn: Custom loading function to load the model
+        load_fn_kwargs: Optional dict of additional kwargs to provide to the loading function
+        download_output_dir: Argument for download function to specify the download folder
+        chunk_size: chunk size for download
+        force: whether to reforce the download of the file
+
+    Returns:
+        model: Optional model, if the model requires download or loading weights
+        model_info: model information card
+    """
+    if isinstance(model_name, str):
+        # find the model with the same name
+        modelcard = self.search(name=model_name)[0]
+    else:
+        modelcard = model_name
+    output_dir = self.download(
+        modelcard=modelcard,
+        output_dir=download_output_dir,
+        chunk_size=chunk_size,
+        force=force,
+    )
+    if load_fn is None:
+        load_fn = joblib.load
+    model_path = dm.fs.join(output_dir, self.MODEL_PATH_NAME)
+    metadata_path = dm.fs.join(output_dir, self.METADATA_PATH_NAME)
+
+    # deal with non-pretrained models that might not have a serialized file
+    model = None
+    load_fn_kwargs = load_fn_kwargs or {}
+    if dm.fs.exists(model_path):
+        model = load_fn(model_path, **load_fn_kwargs)
+    with fsspec.open(metadata_path, "r") as IN:
+        model_info_dict = yaml.safe_load(IN)
+    model_info = ModelInfo(**model_info_dict)
+    return model, model_info
+
+
+
+ +
+ + +
+ + + + +

+ register(modelcard, model=None, chunk_size=2048, save_fn=None, save_fn_kwargs=None, force=True) + +

+ + +
+ +

Register a new model to the store

+

!!! note save_fn + You can pass additional kwargs for your save_fn through the save_fn_kwargs argument. + It's expected that save_fn will be called as : save_fn(model, <model_upload_path>, **save_fn_wargs), + with <model_upload_path> being provided by the model store, and that it will return the path to the serialized model. + If not provided, joblib.dump is used by default.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
modelcard + Union[ModelInfo, dict] + +
+

Model information

+
+
+ required +
model + Optional[Any] + +
+

A path to the model artifact or any object that needs to be saved

+
+
+ None +
chunk_size + int + +
+

the chunk size for the upload

+
+
+ 2048 +
save_fn + Optional[Callable] + +
+

any custom function for serializing the model, that takes the model, the upload path and parameters save_fn_kwargs as inputs.

+
+
+ None +
save_fn_kwargs + Optional[dict] + +
+

any additional kwargs to pass to save_fn

+
+
+ None +
force + bool + +
+

whether to force upload to the bucket

+
+
+ True +
+ +
+ Source code in molfeat/store/modelstore.py +
 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
def register(
+    self,
+    modelcard: Union[ModelInfo, dict],
+    model: Optional[Any] = None,
+    chunk_size: int = 2048,
+    save_fn: Optional[Callable] = None,
+    save_fn_kwargs: Optional[dict] = None,
+    force: bool = True,
+):
+    """
+    Register a new model to the store
+
+    !!! note `save_fn`
+        You can pass additional kwargs for your `save_fn` through the `save_fn_kwargs` argument.
+        It's expected that `save_fn` will be called as : `save_fn(model, <model_upload_path>, **save_fn_wargs)`,
+        with `<model_upload_path>` being provided by the model store, and that it will return the path to the serialized model.
+        If not provided, `joblib.dump` is used by default.
+
+    Args:
+        modelcard: Model information
+        model: A path to the model artifact or any object that needs to be saved
+        chunk_size: the chunk size for the upload
+        save_fn: any custom function for serializing the model, that takes the model, the upload path and parameters `save_fn_kwargs` as inputs.
+        save_fn_kwargs: any additional kwargs to pass to save_fn
+        force: whether to force upload to the bucket
+
+    """
+    if not isinstance(modelcard, ModelInfo):
+        modelcard = ModelInfo(**modelcard)
+    # we save the model first
+    if self.exists(card=modelcard):
+        logger.warning(f"Model {modelcard.name} exists already ...")
+        if not force:
+            return
+
+    model_root_dir = modelcard.path(self.model_store_bucket)
+    model_path = model
+    model_upload_path = dm.fs.join(model_root_dir, self.MODEL_PATH_NAME)
+    model_metadata_upload_path = dm.fs.join(model_root_dir, self.METADATA_PATH_NAME)
+
+    save_fn_kwargs = save_fn_kwargs or {}
+    if save_fn is None:
+        if not isinstance(model, (pathlib.Path, os.PathLike)):
+            local_model_path = tempfile.NamedTemporaryFile(delete=False)
+            with local_model_path:
+                joblib.dump(model, local_model_path)
+            model_path = local_model_path.name
+        # Upload the artifact to the bucket
+        dm.fs.copy_file(
+            model_path,
+            model_upload_path,
+            progress=True,
+            leave_progress=False,
+            chunk_size=chunk_size,
+            force=force,
+        )
+    else:
+        model_path = save_fn(model, model_upload_path, **save_fn_kwargs)
+        # we reset to None if the save_fn has not returned anything
+        model_path = model_path or model_upload_path
+    modelcard.sha256sum = commons.sha256sum(model_path)
+    # then we save the metadata as json
+    with fsspec.open(model_metadata_upload_path, "w") as OUT:
+        OUT.write(modelcard.json())
+    self._update_store()
+    logger.info(f"Successfuly registered model {modelcard.name} !")
+
+
+
+ +
+ + +
+ + + + +

+ search(modelcard=None, **search_kwargs) + +

+ + +
+ +

"Return all model card that match the required search parameters

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
modelcard + Optional[ModelInfo] + +
+

model card to search for

+
+
+ None +
search_kwargs + +
+

search parameters to use

+
+
+ {} +
+ +
+ Source code in molfeat/store/modelstore.py +
320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
def search(self, modelcard: Optional[ModelInfo] = None, **search_kwargs):
+    """ "Return all model card that match the required search parameters
+
+    Args:
+        modelcard: model card to search for
+        search_kwargs: search parameters to use
+    """
+    search_infos = {}
+    found = []
+    if modelcard is not None:
+        search_infos = modelcard.dict().copy()
+    search_infos.update(search_kwargs)
+    for model in self.available_models:
+        if model.match(search_infos, match_only=list(search_infos.keys())):
+            found.append(model)
+    return found
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ PretrainedModel + + +

+ + +
+

+ Bases: ABC

+ + +

Base class for loading pretrained models

+ +
+ Source code in molfeat/store/loader.py +
11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
class PretrainedModel(abc.ABC):
+    """Base class for loading pretrained models"""
+
+    @classmethod
+    def _artifact_load(cls, name: str, download_path: Optional[os.PathLike] = None, **kwargs):
+        """Load an artifact based on its name
+
+        Args:
+            name: name of the model to load
+            download_path: path to a directory where to save the downloaded files
+        """
+        ...
+
+    @classmethod
+    def _load_or_raise(
+        cls,
+        name: str,
+        download_path: Optional[os.PathLike] = None,
+        **kwargs,
+    ):
+        """Load model or raise an exception
+
+        Args:
+            name: name of the model to load
+            download_path: local download path of the model
+
+        """
+        ...
+
+    @abc.abstractmethod
+    def load(self):
+        """Load the model"""
+        ...
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ load() + + + abstractmethod + + +

+ + +
+ +

Load the model

+ +
+ Source code in molfeat/store/loader.py +
40
+41
+42
+43
@abc.abstractmethod
+def load(self):
+    """Load the model"""
+    ...
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ PretrainedStoreModel + + +

+ + +
+

+ Bases: PretrainedModel

+ + +

Class for loading pretrained models from the model zoo

+ +
+ Source code in molfeat/store/loader.py +
 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
class PretrainedStoreModel(PretrainedModel):
+    r"""
+    Class for loading pretrained models from the model zoo
+    """
+
+    def __init__(
+        self,
+        name: str,
+        cache_path: Optional[os.PathLike] = None,
+        store: Optional[ModelStore] = None,
+    ):
+        """Interface for pretrained model from the default modelstore
+
+        Args:
+            name: name of the pretrained transformer in the model store
+            cache_path: optional local cache path.
+            store: ModelStore to use for loading the pretrained model
+        """
+        self.name = name
+        self.cache_path = cache_path
+        if store is None:
+            store = ModelStore()
+        self.store = store
+
+    @classmethod
+    def _artifact_load(cls, name: str, download_path: Optional[os.PathLike] = None, **kwargs):
+        """Load internal artefact from the model store
+
+        Args:
+            name: name of the model to load
+            download_path: path to a directory where to save the downloaded files
+        """
+
+        if not dm.fs.exists(download_path):
+            cls._load_or_raise.cache_clear()
+        return cls._load_or_raise(name, download_path, **kwargs)
+
+    @classmethod
+    @lru_cache(maxsize=100)
+    def _load_or_raise(
+        cls,
+        name: str,
+        download_path: Optional[os.PathLike] = None,
+        store: Optional[ModelStore] = None,
+        **kwargs,
+    ):
+        """Load when from ada or raise exception
+        Args:
+            name: name
+        """
+        if store is None:
+            store = ModelStore()
+        try:
+            modelcard = store.search(name=name)[0]
+            artifact_dir = store.download(modelcard, download_path, **kwargs)
+        except Exception:
+            mess = f"Can't retrieve model {name} from the store !"
+            raise ModelStoreError(mess)
+        return artifact_dir
+
+    def load(self):
+        """Load the model"""
+        raise NotImplementedError
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(name, cache_path=None, store=None) + +

+ + +
+ +

Interface for pretrained model from the default modelstore

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + str + +
+

name of the pretrained transformer in the model store

+
+
+ required +
cache_path + Optional[PathLike] + +
+

optional local cache path.

+
+
+ None +
store + Optional[ModelStore] + +
+

ModelStore to use for loading the pretrained model

+
+
+ None +
+ +
+ Source code in molfeat/store/loader.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
def __init__(
+    self,
+    name: str,
+    cache_path: Optional[os.PathLike] = None,
+    store: Optional[ModelStore] = None,
+):
+    """Interface for pretrained model from the default modelstore
+
+    Args:
+        name: name of the pretrained transformer in the model store
+        cache_path: optional local cache path.
+        store: ModelStore to use for loading the pretrained model
+    """
+    self.name = name
+    self.cache_path = cache_path
+    if store is None:
+        store = ModelStore()
+    self.store = store
+
+
+
+ +
+ + +
+ + + + +

+ load() + +

+ + +
+ +

Load the model

+ +
+ Source code in molfeat/store/loader.py +
106
+107
+108
def load(self):
+    """Load the model"""
+    raise NotImplementedError
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.base.html b/0.9.7/api/molfeat.trans.base.html new file mode 100644 index 0000000..e37c0db --- /dev/null +++ b/0.9.7/api/molfeat.trans.base.html @@ -0,0 +1,6109 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.trans.base - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

molfeat.trans.base

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ BaseFeaturizer + + +

+ + +
+

+ Bases: BaseEstimator

+ + +

Molecule featurizer base class that needs to be implemented by all featurizers. +This featurizer is compatible with scikit-learn estimators and thus can be plugged into a pipeline

+ +
+ Source code in molfeat/trans/base.py +
 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
class BaseFeaturizer(BaseEstimator):
+    """
+    Molecule featurizer base class that needs to be implemented by all featurizers.
+    This featurizer is compatible with scikit-learn estimators and thus can be plugged into a pipeline
+    """
+
+    def __init__(
+        self,
+        n_jobs: int = 1,
+        verbose: bool = True,
+        dtype: Optional[Union[str, Callable]] = None,
+        parallel_kwargs: Optional[Dict[str, Any]] = None,
+        **params,
+    ):
+        self._n_jobs = n_jobs
+        self.dtype = dtype
+        self.verbose = verbose
+        self.parallel_kwargs = parallel_kwargs or {}
+        for k, v in params.items():
+            setattr(self, k, v)
+        self._input_params = dict(n_jobs=n_jobs, dtype=dtype, verbose=verbose, **params)
+
+    @property
+    def n_jobs(self):
+        """Get the number of concurrent jobs to run with this featurizer"""
+        return self._n_jobs
+
+    @n_jobs.setter
+    def n_jobs(self, val):
+        if val >= 1:
+            self._n_jobs = val
+        elif val == -1:
+            self._n_jobs = joblib.cpu_count()
+
+    def _get_param_names(self):
+        """Get parameter names for the estimator"""
+        return self._input_params.keys()
+
+    def _update_params(self):
+        """Update parameters of the current estimator"""
+        ...
+
+    def set_params(self, **params):
+        """Set the parameters of this estimator.
+
+        Returns:
+            self: estimator instance
+        """
+        super().set_params(**params)
+        for k, v in params.items():
+            if k in self._input_params:
+                self._input_params[k] = v
+        self._update_params()
+        return self
+
+    def copy(self):
+        """Return a copy of this object."""
+        copy_obj = self.__class__(**self._input_params)
+        for k, v in self.__dict__.items():
+            if not hasattr(copy_obj, k):
+                setattr(copy_obj, k, copy.deepcopy(v))
+        return copy_obj
+
+    def preprocess(self, inputs: list, labels: Optional[list] = None):
+        """Preprocess input
+
+        Args:
+            inputs: inputs to preprocess
+            labels: labels to preprocess (optional)
+
+        Returns:
+            processed: pre-processed input list
+        """
+        return inputs, labels
+
+    def get_collate_fn(self, *args, **kwargs):
+        """
+        Get collate function of this featurizer. In the implementation of this function
+        you should set the relevant attributes or argument of the underlying collate function
+        (e.g via functools.partial) and return the function itself
+
+        Returns:
+            fn: Collate function for pytorch or None
+        """
+        return None
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ n_jobs + + + property + writable + + +

+ + +
+ +

Get the number of concurrent jobs to run with this featurizer

+
+ +
+ + + + +
+ + + + +

+ copy() + +

+ + +
+ +

Return a copy of this object.

+ +
+ Source code in molfeat/trans/base.py +
116
+117
+118
+119
+120
+121
+122
def copy(self):
+    """Return a copy of this object."""
+    copy_obj = self.__class__(**self._input_params)
+    for k, v in self.__dict__.items():
+        if not hasattr(copy_obj, k):
+            setattr(copy_obj, k, copy.deepcopy(v))
+    return copy_obj
+
+
+
+ +
+ + +
+ + + + +

+ get_collate_fn(*args, **kwargs) + +

+ + +
+ +

Get collate function of this featurizer. In the implementation of this function +you should set the relevant attributes or argument of the underlying collate function +(e.g via functools.partial) and return the function itself

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fn + +
+

Collate function for pytorch or None

+
+
+ +
+ Source code in molfeat/trans/base.py +
136
+137
+138
+139
+140
+141
+142
+143
+144
+145
def get_collate_fn(self, *args, **kwargs):
+    """
+    Get collate function of this featurizer. In the implementation of this function
+    you should set the relevant attributes or argument of the underlying collate function
+    (e.g via functools.partial) and return the function itself
+
+    Returns:
+        fn: Collate function for pytorch or None
+    """
+    return None
+
+
+
+ +
+ + +
+ + + + +

+ preprocess(inputs, labels=None) + +

+ + +
+ +

Preprocess input

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
inputs + list + +
+

inputs to preprocess

+
+
+ required +
labels + Optional[list] + +
+

labels to preprocess (optional)

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
processed + +
+

pre-processed input list

+
+
+ +
+ Source code in molfeat/trans/base.py +
124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
def preprocess(self, inputs: list, labels: Optional[list] = None):
+    """Preprocess input
+
+    Args:
+        inputs: inputs to preprocess
+        labels: labels to preprocess (optional)
+
+    Returns:
+        processed: pre-processed input list
+    """
+    return inputs, labels
+
+
+
+ +
+ + +
+ + + + +

+ set_params(**params) + +

+ + +
+ +

Set the parameters of this estimator.

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
self + +
+

estimator instance

+
+
+ +
+ Source code in molfeat/trans/base.py +
103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
def set_params(self, **params):
+    """Set the parameters of this estimator.
+
+    Returns:
+        self: estimator instance
+    """
+    super().set_params(**params)
+    for k, v in params.items():
+        if k in self._input_params:
+            self._input_params[k] = v
+    self._update_params()
+    return self
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ MoleculeTransformer + + +

+ + +
+

+ Bases: TransformerMixin, BaseFeaturizer

+ + +

Base class for molecular data transformer such as Fingerprinter etc. +If you create a subclass of this featurizer, you will need to make sure that the +input argument of the init are kept as is in the object attributes.

+
+

Note

+

The transformer supports a variety of datatype, they are only enforced when passing the +enforce_dtype=True attributes in __call__. For pandas dataframes, use 'pandas'|'df'|'dataframe'|pd.DataFrame

+
+
+Using a custom Calculator +

You can use your own calculator for featurization. It's recommended to subclass molfeat.calc.base.SerializableCalculator +If you calculator also implements a batch_compute method, it will be used for batch featurization and parallelization options will be passed to it.

+
+ +
+ Source code in molfeat/trans/base.py +
148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
class MoleculeTransformer(TransformerMixin, BaseFeaturizer, metaclass=_TransformerMeta):
+
+    """
+    Base class for molecular data transformer such as Fingerprinter etc.
+    If you create a subclass of this featurizer, you will need to make sure that the
+    input argument of the init are kept as is in the object attributes.
+
+    !!! note
+        The transformer supports a variety of datatype, they are only enforced when passing the
+        `enforce_dtype=True` attributes in `__call__`. For pandas dataframes, use `'pandas'|'df'|'dataframe'|pd.DataFrame`
+
+    ???+ tip "Using a custom Calculator"
+        You can use your own calculator for featurization. It's recommended to subclass `molfeat.calc.base.SerializableCalculator`
+        If you calculator also implements a `batch_compute` method, it will be used for batch featurization and parallelization options will be passed to it.
+    """
+
+    def __init__(
+        self,
+        featurizer: Union[str, Callable],
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Optional[Union[str, Callable]] = None,
+        parallel_kwargs: Optional[Dict[str, Any]] = None,
+        **params,
+    ):
+        """Mol transformer base class
+
+        Args:
+            featurizer: featurizer to use
+            n_jobs (int, optional): Number of job to run in parallel. Defaults to 1.
+            verbose (bool, optional): Verbosity level. Defaults to True.
+            dtype (callable, optional): Output data type. Defaults to None, where numpy arrays are returned.
+            parallel_kwargs (dict, optional): Optional kwargs to pass to the dm.parallelized function. Defaults to None.
+
+        """
+        super().__init__(
+            n_jobs=n_jobs,
+            verbose=verbose,
+            dtype=dtype,
+            featurizer=featurizer,
+            parallel_kwargs=parallel_kwargs,
+            **params,
+        )
+        if callable(featurizer):
+            self.featurizer = featurizer
+        else:
+            self.featurizer = get_calculator(featurizer, **params)
+
+        self.cols_to_keep = None
+        self._fitted = False
+
+        self._save_input_args()
+        if self.featurizer and not (
+            isinstance(self.featurizer, str) or is_callable(self.featurizer)
+        ):
+            raise AttributeError(f"Featurizer {self.featurizer} must be a callable or a string")
+
+    def _save_input_args(self):
+        """Save the input arguments of a transformer to the attribute
+        `_input_args` of the object.
+        """
+
+        # NOTE(hadim): don't override existing _input_args so
+        # it's possible to use MoleculeTransformer as a featurizer
+        # instead of simply a base class.
+        if not hasattr(self, "_input_args"):
+            self._input_args = get_input_args()
+
+    def _update_params(self):
+        if not callable(self.featurizer):
+            params = copy.deepcopy(self._input_params)
+            params.pop("featurizer")
+            self.featurizer = get_calculator(self.featurizer, **params)
+        self._fitted = False
+
+    def __setstate__(self, state):
+        state.pop("callbacks", None)
+        self.__dict__.update(state)
+        self.__dict__["parallel_kwargs"] = state.get("parallel_kwargs", {})
+        self._update_params()
+
+    def fit(self, X: List[Union[dm.Mol, str]], y: Optional[list] = None, **fit_params):
+        """Fit the current transformer on given dataset.
+
+        The goal of fitting is for example to identify nan columns values
+        that needs to be removed from the dataset
+
+        Args:
+            X: input list of molecules
+            y (list, optional): Optional list of molecular properties. Defaults to None.
+
+        Returns:
+            self: MolTransformer instance after fitting
+        """
+        feats = self.transform(X, ignore_errors=True)
+        lengths = [len(x) for x in feats if not datatype.is_null(x)]
+        if lengths:
+            # we will ignore all nan
+            feats = datatype.to_numpy([f for f in feats if not datatype.is_null(f)])
+            self.cols_to_keep = (~np.any(np.isnan(feats), axis=0)).nonzero()[0]
+        self._fitted = True
+        return self
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Compute features for a single molecule.
+        This method would potentially need to be reimplemented by child classes
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns
+            feat: featurized input molecule
+
+        """
+        feat = None
+        try:
+            feat = datatype.to_numpy(self.featurizer(mol))
+            if self.cols_to_keep is not None:
+                feat = feat[self.cols_to_keep]
+        except Exception as e:
+            if self.verbose:
+                logger.error(e)
+        return feat
+
+    def transform(
+        self,
+        mols: List[Union[dm.Mol, str]],
+        ignore_errors: bool = False,
+        **kwargs,
+    ):
+        r"""
+        Compute the features for a set of molecules.
+
+        !!! note
+            Note that depending on the `ignore_errors` argument, all failed
+            featurization (caused whether by invalid smiles or error during
+            data transformation) will be substitued by None features for the
+            corresponding molecule. This is done, so you can find the positions
+            of these molecules and filter them out according to your own logic.
+
+        Args:
+            mols: a list containing smiles or mol objects
+            ignore_errors (bool, optional): Whether to silently ignore errors
+
+
+        Returns:
+            features: a list of features for each molecule in the input set
+        """
+        # Convert single mol to iterable format
+        if isinstance(mols, pd.DataFrame):
+            mols = mols[mols.columns[0]]
+        if isinstance(mols, (str, dm.Mol)) or not isinstance(mols, Iterable):
+            mols = [mols]
+
+        def _to_mol(x):
+            return dm.to_mol(x) if x else None
+
+        parallel_kwargs = getattr(self, "parallel_kwargs", {})
+
+        if hasattr(self.featurizer, "batch_compute") and callable(self.featurizer.batch_compute):
+            # this calculator can be batched which will be faster
+            features = self.featurizer.batch_compute(mols, n_jobs=self.n_jobs, **parallel_kwargs)
+        else:
+            mols = dm.parallelized(_to_mol, mols, n_jobs=self.n_jobs, **parallel_kwargs)
+            if self.n_jobs not in [0, 1]:
+                # use a proxy model to run in parallel
+                cpy = self.copy()
+                features = dm.parallelized(
+                    cpy._transform,
+                    mols,
+                    n_jobs=self.n_jobs,
+                    **cpy.parallel_kwargs,
+                )
+            else:
+                features = [self._transform(mol) for mol in mols]
+        if not ignore_errors:
+            for ind, feat in enumerate(features):
+                if feat is None:
+                    raise ValueError(
+                        f"Cannot transform molecule at index {ind}. Please check logs (set verbose to True) to see errors!"
+                    )
+
+        # sklearn feature validation for sklearn pipeline
+        return datatype.as_numpy_array_if_possible(features, self.dtype)
+
+    def __len__(self):
+        """Compute featurizer length"""
+
+        # check length and _length attribute
+        cols_to_keep = getattr(self, "cols_to_keep", None)
+        cur_length = None
+
+        if cols_to_keep is not None:
+            cur_length = len(cols_to_keep)
+        else:
+            cur_length = getattr(self, "length", getattr(self, "_length", None))
+            # then check the featurizer length if it's a callable and not a string/None
+            if (
+                cur_length is None
+                and callable(self.featurizer)
+                and hasattr(self.featurizer, "__len__")
+            ):
+                cur_length = len(self.featurizer)
+
+        if cur_length is None:
+            raise ValueError(
+                f"Cannot auto-determine length of this MolTransformer: {self.__class__.__name__}"
+            )
+
+        return cur_length
+
+    def __call__(
+        self,
+        mols: List[Union[dm.Mol, str]],
+        enforce_dtype: bool = True,
+        ignore_errors: bool = False,
+        **kwargs,
+    ):
+        r"""
+        Calculate features for molecules. Using __call__, instead of transform.
+        If ignore_error is True, a list of features and valid ids are returned.
+        Note that most Transfomers allow you to specify
+        a return datatype.
+
+        Args:
+            mols:  Mol or SMILES of the molecules to be transformed
+            enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+            ignore_errors: Whether to ignore errors during featurization or raise an error.
+            kwargs: Named parameters for the transform method
+
+        Returns:
+            feats: list of valid features
+            ids: all valid molecule positions that did not failed during featurization.
+                Only returned when ignore_errors is True.
+
+        """
+        features = self.transform(mols, ignore_errors=ignore_errors, enforce_dtype=False, **kwargs)
+        ids = np.arange(len(features))
+        if ignore_errors:
+            features, ids = self._filter_none(features)
+        if self.dtype is not None and enforce_dtype:
+            features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+        if not ignore_errors:
+            return features
+        return features, ids
+
+    @staticmethod
+    def _filter_none(features):
+        ids_bad = []
+        # If the features are a list, filter the None ids
+        if isinstance(features, (tuple, list, np.ndarray)):
+            for f_id, feat in enumerate(features):
+                if datatype.is_null(feat):
+                    ids_bad.append(f_id)
+            ids_to_keep = [
+                this_id for this_id in np.arange(0, len(features)) if this_id not in ids_bad
+            ]
+            features = [features[ii] for ii in ids_to_keep]
+
+        # If the features are a dict or DataFrame, filter the ids when any key id is None
+        elif isinstance(features, (dict, pd.DataFrame)):
+            if isinstance(features, dict):
+                features = pd.DataFrame(features)
+            for feat_col in features.columns:
+                for f_id, feat in enumerate(features[feat_col].values.flatten()):
+                    if feat is None:
+                        ids_bad.append(f_id)
+            ids_bad = np.unique(ids_bad).tolist()
+            all_ids = np.arange(0, features.shape[0])
+            ids_to_keep = [this_id for this_id in all_ids if this_id not in ids_bad]
+            features = features.iloc[ids_to_keep, :]
+
+        else:
+            ids_to_keep = np.arange(0, features.shape[0])
+        return features, list(ids_to_keep)
+
+    @property
+    def columns(self):
+        """Get the list of columns for this molecular descriptor
+
+        Returns:
+            columns (list): Name of the columns of the descriptor
+        """
+        columns = getattr(self.featurizer, "columns", None)
+        cols_to_keep = getattr(self, "cols_to_keep", None)
+        if columns is not None and cols_to_keep is not None and len(cols_to_keep) > 0:
+            columns = [columns[i] for i in cols_to_keep]
+        return columns
+
+    @staticmethod
+    def batch_transform(
+        transformer: Callable,
+        mols: List[Union[dm.Mol, str]],
+        batch_size: int = 256,
+        n_jobs: Optional[int] = None,
+        concatenate: bool = True,
+        progress: bool = True,
+        leave_progress: bool = False,
+        **parallel_kwargs,
+    ):
+        """
+        Batched computation of featurization of a list of molecules
+
+        Args:
+            transformer: Fingerprint transformer
+            mols: List of molecules to featurize
+            batch_size: Batch size
+            n_jobs: number of jobs to run in parallel
+            concatenate: Whether to concatenate the results or return the list of batched results
+            progress: whether to show progress bar
+            leave_progress: whether to leave progress bar after completion
+            parallel_kwargs: additional arguments to pass to dm.parallelized
+
+        Returns:
+            List of batches
+        """
+
+        step_size = int(np.ceil(len(mols) / batch_size))
+        batched_mols = np.array_split(mols, step_size)
+
+        tqdm_kwargs = parallel_kwargs.setdefault("tqdm_kwargs", {})
+        tqdm_kwargs.update(leave=leave_progress, desc="Batch compute:")
+        parallel_kwargs["tqdm_kwargs"] = tqdm_kwargs
+
+        # it's recommended to use a precomputed molecule transformer
+        # instead of the internal cache for pretrained models
+        cache_attr = "cache"
+        existing_cache = getattr(transformer, cache_attr, None)
+        if existing_cache is None:
+            cache_attr = "precompute_cache"
+            existing_cache = getattr(transformer, cache_attr, None)
+
+        use_mp_cache = (
+            existing_cache is not None
+            and not isinstance(existing_cache, MPDataCache)
+            and n_jobs not in [None, 0, 1]  # this is based on datamol sequential vs parallel
+        )
+        if use_mp_cache:
+            # we need to change the cache system to one that works with multiprocessing
+            # to have a shared memory
+            new_cache = MPDataCache()
+            new_cache.update(existing_cache)
+            setattr(transformer, cache_attr, new_cache)
+
+        transformed = dm.parallelized(
+            transformer,
+            batched_mols,
+            n_jobs=n_jobs,
+            progress=progress,
+            **parallel_kwargs,
+        )
+        if use_mp_cache:
+            # we set back the original transformation while updating it with
+            # all the missing values
+            existing_cache.update(getattr(transformer, cache_attr, {}))
+            setattr(transformer, cache_attr, existing_cache)
+
+        if concatenate:
+            # if we ask for concatenation, then we would need to fix None values ideally
+            fixed_transformations = []
+            for computed_trans in transformed:
+                if computed_trans is None:
+                    computed_trans = np.full(len(computed_trans), len(transformer), np.nan)
+                else:
+                    for i, x in enumerate(computed_trans):
+                        if x is None:
+                            computed_trans[i] = np.full(len(transformer), np.nan)
+                fixed_transformations.append(computed_trans)
+            return np.concatenate(fixed_transformations)
+        return transformed
+
+    # Featurizer to state methods
+
+    def to_state_dict(self) -> dict:
+        """Serialize the featurizer to a state dict."""
+
+        if getattr(self, "_input_args") is None:
+            raise ValueError(f"Cannot save state for this transformer '{self.__class__.__name__}'")
+
+        # Process the input arguments before building the state
+        args = copy.deepcopy(self._input_args)
+
+        # Deal with dtype
+        if "dtype" in args and not isinstance(args["dtype"], str):
+            args["dtype"] = map_dtype(args["dtype"])
+
+        ## Deal with graph atom/bond featurizers
+        # NOTE(hadim): it's important to highlight that atom/bond featurizers can't be
+        # customized with this logic.
+        if args.get("atom_featurizer") is not None:
+            if hasattr(args.get("atom_featurizer"), "to_state_dict"):
+                args["atom_featurizer"] = args["atom_featurizer"].to_state_dict()
+                args["_atom_featurizer_is_pickled"] = False
+            else:
+                logger.warning
+                (
+                    "You are attempting to pickle an atom featurizer without a `to_state_dict` function into a hex string"
+                )
+                args["atom_featurizer"] = fn_to_hex(args["atom_featurizer"])
+                args["_atom_featurizer_is_pickled"] = True
+
+        # deal with bond featurizer
+        if args.get("bond_featurizer") is not None:
+            if hasattr(args.get("bond_featurizer"), "to_state_dict"):
+                args["bond_featurizer"] = args["bond_featurizer"].to_state_dict()
+                args["_bond_featurizer_is_pickled"] = False
+            else:
+                logger.warning(
+                    "You are attempting to pickle a bond featurizer without a `to_state_dict` function into a hex string"
+                )
+                args["bond_featurizer"] = fn_to_hex(args["bond_featurizer"])
+                args["_bond_featurizer_is_pickled"] = True
+
+        ## Deal with custom featurizer
+        if "featurizer" in args and isinstance(args["featurizer"], Callable):
+            if hasattr(args["featurizer"], "to_state_dict"):
+                args["featurizer"] = args["featurizer"].to_state_dict()
+                args["_featurizer_is_pickled"] = False
+            else:
+                logger.warning(
+                    "You are attempting to pickle a callable without a `to_state_dict` function into a hex string"
+                )
+                args["featurizer"] = fn_to_hex(args["featurizer"])
+                args["_featurizer_is_pickled"] = True
+
+        # Build the state
+        state = {}
+        state["name"] = self.__class__.__name__
+        state["args"] = args
+        state["_molfeat_version"] = MOLFEAT_VERSION
+        return state
+
+    def to_state_json(self) -> str:
+        return json.dumps(self.to_state_dict())
+
+    def to_state_yaml(self) -> str:
+        return yaml.dump(self.to_state_dict(), Dumper=yaml.SafeDumper)
+
+    def to_state_json_file(self, filepath: Union[str, Path]):
+        with fsspec.open(filepath, "w") as f:
+            f.write(self.to_state_json())  # type: ignore
+
+    def to_state_yaml_file(self, filepath: Union[str, Path]):
+        with fsspec.open(filepath, "w") as f:
+            f.write(self.to_state_yaml())  # type: ignore
+
+    # State to featurizer methods
+
+    @staticmethod
+    def from_state_dict(state: dict, override_args: Optional[dict] = None) -> "MoleculeTransformer":
+        """Reload a featurizer from a state dict."""
+
+        # Don't alter the original state dict
+        state = copy.deepcopy(state)
+
+        # MoleculeTransformer is a special case that has his own logic
+        if state["name"] == "PrecomputedMolTransformer":
+            return PrecomputedMolTransformer.from_state_dict(
+                state=state,
+                override_args=override_args,
+            )
+
+        # Get the name
+        transformer_class = _TRANSFORMERS.get(state["name"])
+        if transformer_class is None:
+            raise ValueError(f"The featurizer '{state['name']}' is not supported.")
+        if isinstance(transformer_class, str):
+            # Get the transformer class from its path
+            transformer_class = import_from_string(transformer_class)
+
+        # Process the state as needed
+        args = state.get("args", {})
+
+        # Deal with dtype
+        if "dtype" in args and isinstance(args["dtype"], str):
+            args["dtype"] = map_dtype(args["dtype"])
+
+        ## Deal with graph atom/bond featurizers
+        if args.get("atom_featurizer") is not None:
+            if not args.get("_atom_featurizer_is_pickled"):
+                klass_name = args["atom_featurizer"].get("name")
+                args["atom_featurizer"] = ATOM_FEATURIZER_MAPPING_REVERSE[
+                    klass_name
+                ].from_state_dict(args["atom_featurizer"])
+            else:
+                # buffer = io.BytesIO(bytes.fromhex(args["atom_featurizer"]))
+                # args["atom_featurizer"] = joblib.load(buffer)
+                args["atom_featurizer"] = hex_to_fn(args["atom_featurizer"])
+            args.pop("_atom_featurizer_is_pickled", None)
+        if args.get("bond_featurizer") is not None:
+            if not args.get("_bond_featurizer_is_pickled"):
+                klass_name = args["bond_featurizer"].get("name")
+                args["bond_featurizer"] = BOND_FEATURIZER_MAPPING_REVERSE[
+                    klass_name
+                ].from_state_dict(args["bond_featurizer"])
+            else:
+                args["bond_featurizer"] = hex_to_fn(args["bond_featurizer"])
+            args.pop("_bond_featurizer_is_pickled", None)
+        ## Deal with custom featurizer
+        if "featurizer" in args:
+            if args.get("_featurizer_is_pickled") is True:
+                args["featurizer"] = hex_to_fn(args["featurizer"])
+                args.pop("_featurizer_is_pickled")
+            elif (
+                isinstance(args["featurizer"], Mapping)
+                and args["featurizer"].get("name") in _CALCULATORS
+            ):
+                # we have found a known calculator
+                klass_name = args["featurizer"].get("name")
+                args["featurizer"] = _CALCULATORS[klass_name].from_state_dict(args["featurizer"])
+                args.pop("_featurizer_is_pickled")
+
+        if override_args is not None:
+            args.update(override_args)
+
+        # Create the transformer
+        featurizer = transformer_class(**args)
+        return featurizer
+
+    @staticmethod
+    def from_state_json(
+        state_json: str,
+        override_args: Optional[dict] = None,
+    ) -> "MoleculeTransformer":
+        state_dict = json.loads(state_json)
+        return MoleculeTransformer.from_state_dict(state_dict, override_args=override_args)
+
+    @staticmethod
+    def from_state_yaml(
+        state_yaml: str,
+        override_args: Optional[dict] = None,
+    ) -> "MoleculeTransformer":
+        state_dict = yaml.load(state_yaml, Loader=yaml.SafeLoader)
+        return MoleculeTransformer.from_state_dict(state_dict, override_args=override_args)
+
+    @staticmethod
+    def from_state_json_file(
+        filepath: Union[str, Path],
+        override_args: Optional[dict] = None,
+    ) -> "MoleculeTransformer":
+        with fsspec.open(filepath, "r") as f:
+            featurizer = MoleculeTransformer.from_state_json(f.read(), override_args=override_args)  # type: ignore
+        return featurizer
+
+    @staticmethod
+    def from_state_yaml_file(
+        filepath: Union[str, Path],
+        override_args: Optional[dict] = None,
+    ) -> "MoleculeTransformer":
+        with fsspec.open(filepath, "r") as f:
+            featurizer = MoleculeTransformer.from_state_yaml(f.read(), override_args=override_args)  # type: ignore
+        return featurizer
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the list of columns for this molecular descriptor

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
columns + list + +
+

Name of the columns of the descriptor

+
+
+
+ +
+ + + + +
+ + + + +

+ __call__(mols, enforce_dtype=True, ignore_errors=False, **kwargs) + +

+ + +
+ +

Calculate features for molecules. Using call, instead of transform. +If ignore_error is True, a list of features and valid ids are returned. +Note that most Transfomers allow you to specify +a return datatype.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

Mol or SMILES of the molecules to be transformed

+
+
+ required +
enforce_dtype + bool + +
+

whether to enforce the instance dtype in the generated fingerprint

+
+
+ True +
ignore_errors + bool + +
+

Whether to ignore errors during featurization or raise an error.

+
+
+ False +
kwargs + +
+

Named parameters for the transform method

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
feats + +
+

list of valid features

+
+
ids + +
+

all valid molecule positions that did not failed during featurization. +Only returned when ignore_errors is True.

+
+
+ +
+ Source code in molfeat/trans/base.py +
360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
def __call__(
+    self,
+    mols: List[Union[dm.Mol, str]],
+    enforce_dtype: bool = True,
+    ignore_errors: bool = False,
+    **kwargs,
+):
+    r"""
+    Calculate features for molecules. Using __call__, instead of transform.
+    If ignore_error is True, a list of features and valid ids are returned.
+    Note that most Transfomers allow you to specify
+    a return datatype.
+
+    Args:
+        mols:  Mol or SMILES of the molecules to be transformed
+        enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+        ignore_errors: Whether to ignore errors during featurization or raise an error.
+        kwargs: Named parameters for the transform method
+
+    Returns:
+        feats: list of valid features
+        ids: all valid molecule positions that did not failed during featurization.
+            Only returned when ignore_errors is True.
+
+    """
+    features = self.transform(mols, ignore_errors=ignore_errors, enforce_dtype=False, **kwargs)
+    ids = np.arange(len(features))
+    if ignore_errors:
+        features, ids = self._filter_none(features)
+    if self.dtype is not None and enforce_dtype:
+        features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+    if not ignore_errors:
+        return features
+    return features, ids
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer, n_jobs=1, verbose=False, dtype=None, parallel_kwargs=None, **params) + +

+ + +
+ +

Mol transformer base class

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer + Union[str, Callable] + +
+

featurizer to use

+
+
+ required +
n_jobs + int + +
+

Number of job to run in parallel. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to True.

+
+
+ False +
dtype + callable + +
+

Output data type. Defaults to None, where numpy arrays are returned.

+
+
+ None +
parallel_kwargs + dict + +
+

Optional kwargs to pass to the dm.parallelized function. Defaults to None.

+
+
+ None +
+ +
+ Source code in molfeat/trans/base.py +
164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
def __init__(
+    self,
+    featurizer: Union[str, Callable],
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Optional[Union[str, Callable]] = None,
+    parallel_kwargs: Optional[Dict[str, Any]] = None,
+    **params,
+):
+    """Mol transformer base class
+
+    Args:
+        featurizer: featurizer to use
+        n_jobs (int, optional): Number of job to run in parallel. Defaults to 1.
+        verbose (bool, optional): Verbosity level. Defaults to True.
+        dtype (callable, optional): Output data type. Defaults to None, where numpy arrays are returned.
+        parallel_kwargs (dict, optional): Optional kwargs to pass to the dm.parallelized function. Defaults to None.
+
+    """
+    super().__init__(
+        n_jobs=n_jobs,
+        verbose=verbose,
+        dtype=dtype,
+        featurizer=featurizer,
+        parallel_kwargs=parallel_kwargs,
+        **params,
+    )
+    if callable(featurizer):
+        self.featurizer = featurizer
+    else:
+        self.featurizer = get_calculator(featurizer, **params)
+
+    self.cols_to_keep = None
+    self._fitted = False
+
+    self._save_input_args()
+    if self.featurizer and not (
+        isinstance(self.featurizer, str) or is_callable(self.featurizer)
+    ):
+        raise AttributeError(f"Featurizer {self.featurizer} must be a callable or a string")
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Compute featurizer length

+ +
+ Source code in molfeat/trans/base.py +
334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
def __len__(self):
+    """Compute featurizer length"""
+
+    # check length and _length attribute
+    cols_to_keep = getattr(self, "cols_to_keep", None)
+    cur_length = None
+
+    if cols_to_keep is not None:
+        cur_length = len(cols_to_keep)
+    else:
+        cur_length = getattr(self, "length", getattr(self, "_length", None))
+        # then check the featurizer length if it's a callable and not a string/None
+        if (
+            cur_length is None
+            and callable(self.featurizer)
+            and hasattr(self.featurizer, "__len__")
+        ):
+            cur_length = len(self.featurizer)
+
+    if cur_length is None:
+        raise ValueError(
+            f"Cannot auto-determine length of this MolTransformer: {self.__class__.__name__}"
+        )
+
+    return cur_length
+
+
+
+ +
+ + +
+ + + + +

+ batch_transform(transformer, mols, batch_size=256, n_jobs=None, concatenate=True, progress=True, leave_progress=False, **parallel_kwargs) + + + staticmethod + + +

+ + +
+ +

Batched computation of featurization of a list of molecules

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
transformer + Callable + +
+

Fingerprint transformer

+
+
+ required +
mols + List[Union[Mol, str]] + +
+

List of molecules to featurize

+
+
+ required +
batch_size + int + +
+

Batch size

+
+
+ 256 +
n_jobs + Optional[int] + +
+

number of jobs to run in parallel

+
+
+ None +
concatenate + bool + +
+

Whether to concatenate the results or return the list of batched results

+
+
+ True +
progress + bool + +
+

whether to show progress bar

+
+
+ True +
leave_progress + bool + +
+

whether to leave progress bar after completion

+
+
+ False +
parallel_kwargs + +
+

additional arguments to pass to dm.parallelized

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

List of batches

+
+
+ +
+ Source code in molfeat/trans/base.py +
438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
@staticmethod
+def batch_transform(
+    transformer: Callable,
+    mols: List[Union[dm.Mol, str]],
+    batch_size: int = 256,
+    n_jobs: Optional[int] = None,
+    concatenate: bool = True,
+    progress: bool = True,
+    leave_progress: bool = False,
+    **parallel_kwargs,
+):
+    """
+    Batched computation of featurization of a list of molecules
+
+    Args:
+        transformer: Fingerprint transformer
+        mols: List of molecules to featurize
+        batch_size: Batch size
+        n_jobs: number of jobs to run in parallel
+        concatenate: Whether to concatenate the results or return the list of batched results
+        progress: whether to show progress bar
+        leave_progress: whether to leave progress bar after completion
+        parallel_kwargs: additional arguments to pass to dm.parallelized
+
+    Returns:
+        List of batches
+    """
+
+    step_size = int(np.ceil(len(mols) / batch_size))
+    batched_mols = np.array_split(mols, step_size)
+
+    tqdm_kwargs = parallel_kwargs.setdefault("tqdm_kwargs", {})
+    tqdm_kwargs.update(leave=leave_progress, desc="Batch compute:")
+    parallel_kwargs["tqdm_kwargs"] = tqdm_kwargs
+
+    # it's recommended to use a precomputed molecule transformer
+    # instead of the internal cache for pretrained models
+    cache_attr = "cache"
+    existing_cache = getattr(transformer, cache_attr, None)
+    if existing_cache is None:
+        cache_attr = "precompute_cache"
+        existing_cache = getattr(transformer, cache_attr, None)
+
+    use_mp_cache = (
+        existing_cache is not None
+        and not isinstance(existing_cache, MPDataCache)
+        and n_jobs not in [None, 0, 1]  # this is based on datamol sequential vs parallel
+    )
+    if use_mp_cache:
+        # we need to change the cache system to one that works with multiprocessing
+        # to have a shared memory
+        new_cache = MPDataCache()
+        new_cache.update(existing_cache)
+        setattr(transformer, cache_attr, new_cache)
+
+    transformed = dm.parallelized(
+        transformer,
+        batched_mols,
+        n_jobs=n_jobs,
+        progress=progress,
+        **parallel_kwargs,
+    )
+    if use_mp_cache:
+        # we set back the original transformation while updating it with
+        # all the missing values
+        existing_cache.update(getattr(transformer, cache_attr, {}))
+        setattr(transformer, cache_attr, existing_cache)
+
+    if concatenate:
+        # if we ask for concatenation, then we would need to fix None values ideally
+        fixed_transformations = []
+        for computed_trans in transformed:
+            if computed_trans is None:
+                computed_trans = np.full(len(computed_trans), len(transformer), np.nan)
+            else:
+                for i, x in enumerate(computed_trans):
+                    if x is None:
+                        computed_trans[i] = np.full(len(transformer), np.nan)
+            fixed_transformations.append(computed_trans)
+        return np.concatenate(fixed_transformations)
+    return transformed
+
+
+
+ +
+ + +
+ + + + +

+ fit(X, y=None, **fit_params) + +

+ + +
+ +

Fit the current transformer on given dataset.

+

The goal of fitting is for example to identify nan columns values +that needs to be removed from the dataset

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
X + List[Union[Mol, str]] + +
+

input list of molecules

+
+
+ required +
y + list + +
+

Optional list of molecular properties. Defaults to None.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
self + +
+

MolTransformer instance after fitting

+
+
+ +
+ Source code in molfeat/trans/base.py +
229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
def fit(self, X: List[Union[dm.Mol, str]], y: Optional[list] = None, **fit_params):
+    """Fit the current transformer on given dataset.
+
+    The goal of fitting is for example to identify nan columns values
+    that needs to be removed from the dataset
+
+    Args:
+        X: input list of molecules
+        y (list, optional): Optional list of molecular properties. Defaults to None.
+
+    Returns:
+        self: MolTransformer instance after fitting
+    """
+    feats = self.transform(X, ignore_errors=True)
+    lengths = [len(x) for x in feats if not datatype.is_null(x)]
+    if lengths:
+        # we will ignore all nan
+        feats = datatype.to_numpy([f for f in feats if not datatype.is_null(f)])
+        self.cols_to_keep = (~np.any(np.isnan(feats), axis=0)).nonzero()[0]
+    self._fitted = True
+    return self
+
+
+
+ +
+ + +
+ + + + +

+ from_state_dict(state, override_args=None) + + + staticmethod + + +

+ + +
+ +

Reload a featurizer from a state dict.

+ +
+ Source code in molfeat/trans/base.py +
597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
@staticmethod
+def from_state_dict(state: dict, override_args: Optional[dict] = None) -> "MoleculeTransformer":
+    """Reload a featurizer from a state dict."""
+
+    # Don't alter the original state dict
+    state = copy.deepcopy(state)
+
+    # MoleculeTransformer is a special case that has his own logic
+    if state["name"] == "PrecomputedMolTransformer":
+        return PrecomputedMolTransformer.from_state_dict(
+            state=state,
+            override_args=override_args,
+        )
+
+    # Get the name
+    transformer_class = _TRANSFORMERS.get(state["name"])
+    if transformer_class is None:
+        raise ValueError(f"The featurizer '{state['name']}' is not supported.")
+    if isinstance(transformer_class, str):
+        # Get the transformer class from its path
+        transformer_class = import_from_string(transformer_class)
+
+    # Process the state as needed
+    args = state.get("args", {})
+
+    # Deal with dtype
+    if "dtype" in args and isinstance(args["dtype"], str):
+        args["dtype"] = map_dtype(args["dtype"])
+
+    ## Deal with graph atom/bond featurizers
+    if args.get("atom_featurizer") is not None:
+        if not args.get("_atom_featurizer_is_pickled"):
+            klass_name = args["atom_featurizer"].get("name")
+            args["atom_featurizer"] = ATOM_FEATURIZER_MAPPING_REVERSE[
+                klass_name
+            ].from_state_dict(args["atom_featurizer"])
+        else:
+            # buffer = io.BytesIO(bytes.fromhex(args["atom_featurizer"]))
+            # args["atom_featurizer"] = joblib.load(buffer)
+            args["atom_featurizer"] = hex_to_fn(args["atom_featurizer"])
+        args.pop("_atom_featurizer_is_pickled", None)
+    if args.get("bond_featurizer") is not None:
+        if not args.get("_bond_featurizer_is_pickled"):
+            klass_name = args["bond_featurizer"].get("name")
+            args["bond_featurizer"] = BOND_FEATURIZER_MAPPING_REVERSE[
+                klass_name
+            ].from_state_dict(args["bond_featurizer"])
+        else:
+            args["bond_featurizer"] = hex_to_fn(args["bond_featurizer"])
+        args.pop("_bond_featurizer_is_pickled", None)
+    ## Deal with custom featurizer
+    if "featurizer" in args:
+        if args.get("_featurizer_is_pickled") is True:
+            args["featurizer"] = hex_to_fn(args["featurizer"])
+            args.pop("_featurizer_is_pickled")
+        elif (
+            isinstance(args["featurizer"], Mapping)
+            and args["featurizer"].get("name") in _CALCULATORS
+        ):
+            # we have found a known calculator
+            klass_name = args["featurizer"].get("name")
+            args["featurizer"] = _CALCULATORS[klass_name].from_state_dict(args["featurizer"])
+            args.pop("_featurizer_is_pickled")
+
+    if override_args is not None:
+        args.update(override_args)
+
+    # Create the transformer
+    featurizer = transformer_class(**args)
+    return featurizer
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Serialize the featurizer to a state dict.

+ +
+ Source code in molfeat/trans/base.py +
522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
def to_state_dict(self) -> dict:
+    """Serialize the featurizer to a state dict."""
+
+    if getattr(self, "_input_args") is None:
+        raise ValueError(f"Cannot save state for this transformer '{self.__class__.__name__}'")
+
+    # Process the input arguments before building the state
+    args = copy.deepcopy(self._input_args)
+
+    # Deal with dtype
+    if "dtype" in args and not isinstance(args["dtype"], str):
+        args["dtype"] = map_dtype(args["dtype"])
+
+    ## Deal with graph atom/bond featurizers
+    # NOTE(hadim): it's important to highlight that atom/bond featurizers can't be
+    # customized with this logic.
+    if args.get("atom_featurizer") is not None:
+        if hasattr(args.get("atom_featurizer"), "to_state_dict"):
+            args["atom_featurizer"] = args["atom_featurizer"].to_state_dict()
+            args["_atom_featurizer_is_pickled"] = False
+        else:
+            logger.warning
+            (
+                "You are attempting to pickle an atom featurizer without a `to_state_dict` function into a hex string"
+            )
+            args["atom_featurizer"] = fn_to_hex(args["atom_featurizer"])
+            args["_atom_featurizer_is_pickled"] = True
+
+    # deal with bond featurizer
+    if args.get("bond_featurizer") is not None:
+        if hasattr(args.get("bond_featurizer"), "to_state_dict"):
+            args["bond_featurizer"] = args["bond_featurizer"].to_state_dict()
+            args["_bond_featurizer_is_pickled"] = False
+        else:
+            logger.warning(
+                "You are attempting to pickle a bond featurizer without a `to_state_dict` function into a hex string"
+            )
+            args["bond_featurizer"] = fn_to_hex(args["bond_featurizer"])
+            args["_bond_featurizer_is_pickled"] = True
+
+    ## Deal with custom featurizer
+    if "featurizer" in args and isinstance(args["featurizer"], Callable):
+        if hasattr(args["featurizer"], "to_state_dict"):
+            args["featurizer"] = args["featurizer"].to_state_dict()
+            args["_featurizer_is_pickled"] = False
+        else:
+            logger.warning(
+                "You are attempting to pickle a callable without a `to_state_dict` function into a hex string"
+            )
+            args["featurizer"] = fn_to_hex(args["featurizer"])
+            args["_featurizer_is_pickled"] = True
+
+    # Build the state
+    state = {}
+    state["name"] = self.__class__.__name__
+    state["args"] = args
+    state["_molfeat_version"] = MOLFEAT_VERSION
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ transform(mols, ignore_errors=False, **kwargs) + +

+ + +
+ +

Compute the features for a set of molecules.

+
+

Note

+

Note that depending on the ignore_errors argument, all failed +featurization (caused whether by invalid smiles or error during +data transformation) will be substitued by None features for the +corresponding molecule. This is done, so you can find the positions +of these molecules and filter them out according to your own logic.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

a list containing smiles or mol objects

+
+
+ required +
ignore_errors + bool + +
+

Whether to silently ignore errors

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
features + +
+

a list of features for each molecule in the input set

+
+
+ +
+ Source code in molfeat/trans/base.py +
273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
def transform(
+    self,
+    mols: List[Union[dm.Mol, str]],
+    ignore_errors: bool = False,
+    **kwargs,
+):
+    r"""
+    Compute the features for a set of molecules.
+
+    !!! note
+        Note that depending on the `ignore_errors` argument, all failed
+        featurization (caused whether by invalid smiles or error during
+        data transformation) will be substitued by None features for the
+        corresponding molecule. This is done, so you can find the positions
+        of these molecules and filter them out according to your own logic.
+
+    Args:
+        mols: a list containing smiles or mol objects
+        ignore_errors (bool, optional): Whether to silently ignore errors
+
+
+    Returns:
+        features: a list of features for each molecule in the input set
+    """
+    # Convert single mol to iterable format
+    if isinstance(mols, pd.DataFrame):
+        mols = mols[mols.columns[0]]
+    if isinstance(mols, (str, dm.Mol)) or not isinstance(mols, Iterable):
+        mols = [mols]
+
+    def _to_mol(x):
+        return dm.to_mol(x) if x else None
+
+    parallel_kwargs = getattr(self, "parallel_kwargs", {})
+
+    if hasattr(self.featurizer, "batch_compute") and callable(self.featurizer.batch_compute):
+        # this calculator can be batched which will be faster
+        features = self.featurizer.batch_compute(mols, n_jobs=self.n_jobs, **parallel_kwargs)
+    else:
+        mols = dm.parallelized(_to_mol, mols, n_jobs=self.n_jobs, **parallel_kwargs)
+        if self.n_jobs not in [0, 1]:
+            # use a proxy model to run in parallel
+            cpy = self.copy()
+            features = dm.parallelized(
+                cpy._transform,
+                mols,
+                n_jobs=self.n_jobs,
+                **cpy.parallel_kwargs,
+            )
+        else:
+            features = [self._transform(mol) for mol in mols]
+    if not ignore_errors:
+        for ind, feat in enumerate(features):
+            if feat is None:
+                raise ValueError(
+                    f"Cannot transform molecule at index {ind}. Please check logs (set verbose to True) to see errors!"
+                )
+
+    # sklearn feature validation for sklearn pipeline
+    return datatype.as_numpy_array_if_possible(features, self.dtype)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ PrecomputedMolTransformer + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Convenience class for storing precomputed features.

+ +
+ Source code in molfeat/trans/base.py +
703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
+786
+787
+788
+789
+790
+791
+792
+793
+794
+795
+796
+797
+798
+799
+800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
+817
+818
+819
+820
+821
+822
+823
+824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
+851
+852
+853
+854
+855
+856
+857
+858
+859
+860
+861
+862
+863
+864
+865
+866
+867
+868
+869
+870
+871
+872
+873
+874
+875
+876
+877
+878
+879
+880
+881
+882
+883
+884
+885
+886
+887
+888
class PrecomputedMolTransformer(MoleculeTransformer):
+    """Convenience class for storing precomputed features."""
+
+    def __init__(
+        self,
+        cache: Optional[Union[_Cache, Mapping[Any, Any], CacheList]] = None,
+        cache_dict: Optional[Dict[str, Union[_Cache, Mapping[Any, Any], CacheList]]] = None,
+        cache_key: Optional[str] = None,
+        *args,
+        featurizer: Optional[Union[MoleculeTransformer, str]] = None,
+        state_path: Optional[str] = None,
+        **kwargs,
+    ):
+        """
+        Transformer that supports precomputation of features. You can either pass an empty cache or a pre-populated cache
+
+        Args:
+            cache: a datastructure of type mapping that maps each molecule to the precomputed features
+            cache_dict: A dictionary of cache objects. This is a convenient structure when use multiple
+                datacache for model selection.
+            cache_key: The key of cache object to use.
+            featurizer: optional featurizer used to compute the features of values not in the cache.
+                Either the featurizer object or a string.
+            state_path: optional state file path used to initiate the transformer object at the initialization
+        """
+        if (state_path is not None) and (
+            (cache is not None) or (cache_dict is not None and cache_key is not None)
+        ):
+            raise ValueError(
+                "`PrecomputedMolTransformer` can only be initiated by either `state_path` or"
+                " the rest of parameters for cache and featurizer. But both are given."
+            )
+
+        super().__init__(*args, featurizer="none", **kwargs)
+
+        if state_path is not None:
+            self.__dict__ = self.from_state_file(state_path=state_path).__dict__.copy()
+        else:
+            if cache_dict is not None and cache_key is not None:
+                self.cache_key = cache_key
+                self.cache = cache_dict[self.cache_key]
+            elif cache is not None:
+                self.cache = cache
+            else:
+                raise AttributeError("The cache is not specified.")
+
+            if isinstance(featurizer, str):
+                self.base_featurizer = MoleculeTransformer(featurizer, *args, **kwargs)
+            else:
+                self.base_featurizer = featurizer
+
+        # Set the length of the featurizer
+        if len(self.cache) > 0:
+            self.length = len(list(self.cache.values())[0])
+        elif self.base_featurizer is not None:
+            self.length = len(self.base_featurizer)
+        else:
+            raise AttributeError(
+                "The cache is empty and the base featurizer is not specified. It's impossible"
+                " to determine the length of the featurizer."
+            )
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Return precomputed feature for a single molecule
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns
+            feat: featurized input molecule
+
+        """
+        feat = self.cache.get(mol)
+        # if feat is None and we have an existing featurizer, we can update the cache
+        if feat is None and self.base_featurizer is not None:
+            feat = self.base_featurizer._transform(mol)
+            self.cache[mol] = feat
+
+        try:
+            feat = datatype.to_numpy(feat)
+            if self.cols_to_keep is not None:
+                feat = feat[self.cols_to_keep]
+        except Exception as e:
+            if self.verbose:
+                logger.error(e)
+        return feat
+
+    def update(self, feat_dict: Mapping[Any, Any]):
+        r"""
+        Fill the cache with new set of features for the molecules in mols.
+
+        Args:
+            feat_dict: A dictionary of molecules to features.
+        """
+        self.cache.update(feat_dict)
+
+    def __getstate__(self):
+        """Get the state for pickling"""
+        state = {k: copy.deepcopy(v) for k, v in self.__dict__.items() if k not in ["cache"]}
+        if isinstance(self.cache, FileCache):
+            state["file_cache_args"] = dict(
+                cache_file=self.cache.cache_file,
+                name=self.cache.name,
+                mol_hasher=self.cache.mol_hasher,
+                n_jobs=self.cache.n_jobs,
+                verbose=self.cache.verbose,
+                file_type=self.cache.file_type,
+                parquet_kwargs=self.cache.parquet_kwargs,
+            )
+        else:
+            # EN: we do not copy the cache
+            state["cache"] = self.cache
+        return state
+
+    def __setstate__(self, state):
+        if "file_cache_args" in state:
+            cache = FileCache(**state.pop("file_cache_args"))
+            state["cache"] = cache
+        return super().__setstate__(state)
+
+    def to_state_dict(self, save_to_file: bool = True) -> dict:
+        """Serialize a PrecomputedMolTransformer object to a state dict.
+
+        Notes:
+            - The base_featurizer must be set or a ValueError will be raised.
+            - The cache must be a FileCache object or a ValueError will be raised.
+
+        Args:
+            save_to_file: whether to save the cache to file.
+        """
+
+        if self.base_featurizer is None:
+            raise ValueError(
+                "You can't serialize a PrecomputedMolTransformer that does not contain a"
+                " featurizer."
+            )
+
+        if not isinstance(self.cache, FileCache):
+            raise ValueError("The cache must be a FileCache object.")
+
+        state = {}
+        state["name"] = "PrecomputedMolTransformer"
+        state["base_featurizer"] = self.base_featurizer.to_state_dict()
+        state["cache"] = self.cache.to_state_dict(save_to_file=save_to_file)
+        state["_molfeat_version"] = MOLFEAT_VERSION
+
+        return state
+
+    @staticmethod
+    def from_state_dict(
+        state: dict,
+        override_args: Optional[dict] = None,
+    ) -> "PrecomputedMolTransformer":
+        # Don't alter the original state dict
+        state = copy.deepcopy(state)
+
+        args = {}
+
+        # Load the FileCache object
+        args["cache"] = FileCache.from_state_dict(state["cache"])
+
+        # Load the base featurizer
+        args["featurizer"] = MoleculeTransformer.from_state_dict(state["base_featurizer"])
+
+        if override_args is not None:
+            args.update(override_args)
+
+        # Doesn't allow state_path in the initiation args
+        args.pop("state_path", None)
+        return PrecomputedMolTransformer(**args)
+
+    def from_state_file(
+        self,
+        state_path: str,
+        override_args: Optional[dict] = None,
+    ) -> "PrecomputedMolTransformer":
+        if state_path.endswith("yaml") or state_path.endswith("yml"):
+            return self.from_state_yaml_file(filepath=state_path, override_args=override_args)
+        elif state_path.endswith("json"):
+            return self.from_state_json_file(filepath=state_path, override_args=override_args)
+        else:
+            raise ValueError(
+                "Only files with 'yaml' or 'json' format are allowed. "
+                "The filename must be ending with `yaml`, 'yml' or 'json'."
+            )
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Get the state for pickling

+ +
+ Source code in molfeat/trans/base.py +
800
+801
+802
+803
+804
+805
+806
+807
+808
+809
+810
+811
+812
+813
+814
+815
+816
def __getstate__(self):
+    """Get the state for pickling"""
+    state = {k: copy.deepcopy(v) for k, v in self.__dict__.items() if k not in ["cache"]}
+    if isinstance(self.cache, FileCache):
+        state["file_cache_args"] = dict(
+            cache_file=self.cache.cache_file,
+            name=self.cache.name,
+            mol_hasher=self.cache.mol_hasher,
+            n_jobs=self.cache.n_jobs,
+            verbose=self.cache.verbose,
+            file_type=self.cache.file_type,
+            parquet_kwargs=self.cache.parquet_kwargs,
+        )
+    else:
+        # EN: we do not copy the cache
+        state["cache"] = self.cache
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ __init__(cache=None, cache_dict=None, cache_key=None, *args, featurizer=None, state_path=None, **kwargs) + +

+ + +
+ +

Transformer that supports precomputation of features. You can either pass an empty cache or a pre-populated cache

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cache + Optional[Union[_Cache, Mapping[Any, Any], CacheList]] + +
+

a datastructure of type mapping that maps each molecule to the precomputed features

+
+
+ None +
cache_dict + Optional[Dict[str, Union[_Cache, Mapping[Any, Any], CacheList]]] + +
+

A dictionary of cache objects. This is a convenient structure when use multiple +datacache for model selection.

+
+
+ None +
cache_key + Optional[str] + +
+

The key of cache object to use.

+
+
+ None +
featurizer + Optional[Union[MoleculeTransformer, str]] + +
+

optional featurizer used to compute the features of values not in the cache. +Either the featurizer object or a string.

+
+
+ None +
state_path + Optional[str] + +
+

optional state file path used to initiate the transformer object at the initialization

+
+
+ None +
+ +
+ Source code in molfeat/trans/base.py +
706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
def __init__(
+    self,
+    cache: Optional[Union[_Cache, Mapping[Any, Any], CacheList]] = None,
+    cache_dict: Optional[Dict[str, Union[_Cache, Mapping[Any, Any], CacheList]]] = None,
+    cache_key: Optional[str] = None,
+    *args,
+    featurizer: Optional[Union[MoleculeTransformer, str]] = None,
+    state_path: Optional[str] = None,
+    **kwargs,
+):
+    """
+    Transformer that supports precomputation of features. You can either pass an empty cache or a pre-populated cache
+
+    Args:
+        cache: a datastructure of type mapping that maps each molecule to the precomputed features
+        cache_dict: A dictionary of cache objects. This is a convenient structure when use multiple
+            datacache for model selection.
+        cache_key: The key of cache object to use.
+        featurizer: optional featurizer used to compute the features of values not in the cache.
+            Either the featurizer object or a string.
+        state_path: optional state file path used to initiate the transformer object at the initialization
+    """
+    if (state_path is not None) and (
+        (cache is not None) or (cache_dict is not None and cache_key is not None)
+    ):
+        raise ValueError(
+            "`PrecomputedMolTransformer` can only be initiated by either `state_path` or"
+            " the rest of parameters for cache and featurizer. But both are given."
+        )
+
+    super().__init__(*args, featurizer="none", **kwargs)
+
+    if state_path is not None:
+        self.__dict__ = self.from_state_file(state_path=state_path).__dict__.copy()
+    else:
+        if cache_dict is not None and cache_key is not None:
+            self.cache_key = cache_key
+            self.cache = cache_dict[self.cache_key]
+        elif cache is not None:
+            self.cache = cache
+        else:
+            raise AttributeError("The cache is not specified.")
+
+        if isinstance(featurizer, str):
+            self.base_featurizer = MoleculeTransformer(featurizer, *args, **kwargs)
+        else:
+            self.base_featurizer = featurizer
+
+    # Set the length of the featurizer
+    if len(self.cache) > 0:
+        self.length = len(list(self.cache.values())[0])
+    elif self.base_featurizer is not None:
+        self.length = len(self.base_featurizer)
+    else:
+        raise AttributeError(
+            "The cache is empty and the base featurizer is not specified. It's impossible"
+            " to determine the length of the featurizer."
+        )
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict(save_to_file=True) + +

+ + +
+ +

Serialize a PrecomputedMolTransformer object to a state dict.

+ +
+ Notes +
    +
  • The base_featurizer must be set or a ValueError will be raised.
  • +
  • The cache must be a FileCache object or a ValueError will be raised.
  • +
+
+ + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
save_to_file + bool + +
+

whether to save the cache to file.

+
+
+ True +
+ +
+ Source code in molfeat/trans/base.py +
824
+825
+826
+827
+828
+829
+830
+831
+832
+833
+834
+835
+836
+837
+838
+839
+840
+841
+842
+843
+844
+845
+846
+847
+848
+849
+850
def to_state_dict(self, save_to_file: bool = True) -> dict:
+    """Serialize a PrecomputedMolTransformer object to a state dict.
+
+    Notes:
+        - The base_featurizer must be set or a ValueError will be raised.
+        - The cache must be a FileCache object or a ValueError will be raised.
+
+    Args:
+        save_to_file: whether to save the cache to file.
+    """
+
+    if self.base_featurizer is None:
+        raise ValueError(
+            "You can't serialize a PrecomputedMolTransformer that does not contain a"
+            " featurizer."
+        )
+
+    if not isinstance(self.cache, FileCache):
+        raise ValueError("The cache must be a FileCache object.")
+
+    state = {}
+    state["name"] = "PrecomputedMolTransformer"
+    state["base_featurizer"] = self.base_featurizer.to_state_dict()
+    state["cache"] = self.cache.to_state_dict(save_to_file=save_to_file)
+    state["_molfeat_version"] = MOLFEAT_VERSION
+
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ update(feat_dict) + +

+ + +
+ +

Fill the cache with new set of features for the molecules in mols.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
feat_dict + Mapping[Any, Any] + +
+

A dictionary of molecules to features.

+
+
+ required +
+ +
+ Source code in molfeat/trans/base.py +
791
+792
+793
+794
+795
+796
+797
+798
def update(self, feat_dict: Mapping[Any, Any]):
+    r"""
+    Fill the cache with new set of features for the molecules in mols.
+
+    Args:
+        feat_dict: A dictionary of molecules to features.
+    """
+    self.cache.update(feat_dict)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.concat.html b/0.9.7/api/molfeat.trans.concat.html new file mode 100644 index 0000000..c26a0f7 --- /dev/null +++ b/0.9.7/api/molfeat.trans.concat.html @@ -0,0 +1,3517 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.trans.concat - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.trans.concat

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ FeatConcat + + +

+ + +
+

+ Bases: list, BaseEstimator

+ + +

Concatenation container for FPVecTransformer. This class allows +merging multiple fingerprints into a single one. +It gives the ability to call the following methods + - fit + - transform + - fit_transform +on a list of transformers and concatenate the results.

+
+

Note

+

The featurization length of this featurizer is accessible via the length property. +len() will return the number of base featurizer.

+
+ +
+ Source code in molfeat/trans/concat.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
class FeatConcat(list, BaseEstimator):
+    r"""
+    Concatenation container for `FPVecTransformer`. This class allows
+    merging multiple fingerprints into a single one.
+    It gives the ability to call the following methods
+        - `fit`
+        - `transform`
+        - `fit_transform`
+    on a list of transformers and concatenate the results.
+
+    !!! note
+        The featurization length of this featurizer is accessible via the `length` property.
+        `len()` will return the number of base featurizer.
+    """
+
+    _STR_DELIMITER = "||"
+
+    def __init__(
+        self,
+        iterable: Optional[Union[Iterable, str]] = None,
+        dtype: Optional[Callable] = None,
+        params: Optional[Dict[str, Any]] = None,
+        collate_fn: Optional[Callable] = None,
+    ):
+        """Featurizer concatenator
+
+        Args:
+            iterable: List of featurizer to concatenate.
+            dtype: Datatype of the computed fingerprint
+            params: Optional dictionary of parameters for the featurizers when there is a need for initializing them
+            collate_fn: optional function to provide for custom collating.
+                By default the collate function will be None, which will use the torch default
+        """
+        super().__init__()
+        self.params = params or {}
+        if isinstance(iterable, str):
+            iterable = [x.strip() for x in iterable.split(self._STR_DELIMITER)]
+        if iterable is not None:
+            for item in iterable:
+                if isinstance(item, str):
+                    item = FPVecTransformer(kind=item, **self.params.get(item, {}))
+                self.append(item)
+        self.dtype = dtype
+        self._length = None
+        self.collate_fn = collate_fn
+
+    def append(self, item):
+        r"""Override the ``append`` to accept only ``FPVecTransformer``"""
+        self._check_supported(item)
+        super().append(item)
+
+    def insert(self, index, item):
+        r"""Override the ``insert`` to accept only ``BaseFeaturizer``"""
+        self._check_suported(item)
+        super().insert(index, item)
+
+    def __add__(self, item):
+        """Override the `__add__` method"""
+        self._check_supported(item)
+        super().__add__(item)
+
+    def __setitem__(self, index, value):
+        """Override the `__setitem__`  method"""
+        self._check_supported(value)
+        super().__setitem__(index, value)
+
+    @property
+    def length(self):
+        """
+        Length property for Feat concatenation.  This is the sum of the length of each transformer.
+        Note that __len__ returns the number of base featurizer here instead.
+        """
+        if self._length is None:
+            full_length = 0
+            for feat in self:
+                if isinstance(feat, FeatConcat):
+                    full_length += feat.length
+                else:
+                    full_length += len(feat)
+            self._length = full_length
+        return self._length
+
+    def _check_supported(self, item):
+        r"""Check if the item is the right type"""
+        if not isinstance(item, FPVecTransformer):
+            raise ValueError("FPVecTransformer allowed only, provided {}".format(type(item)))
+
+    def get_collate_fn(self, *args, **kwargs):
+        """
+        Get collate function of this featurizer. The FeatConcat featurizer use the default
+        collate function which does not do anything.
+
+        Returns:
+            fn: Collate function for pytorch or None
+        """
+        return getattr(self, "collate_fn", None)
+
+    def iter_index(self, indexes: Union[int, Iterator[int]]):
+        r"""
+        Allow the `FeatConcat` to be indexed using a list, or any other iterable.
+
+        Args:
+            indexes: The indexes to index the ``FeatConcat``.
+
+        Returns
+            indexed_fpconcat: A new FeatConcat object with the indexed element
+        """
+        if not isinstance(indexes, (list, tuple)):
+            try:
+                indexes = list(indexes)
+            except TypeError:
+                indexes = [indexes]
+        return FeatConcat([self[ii] for ii in indexes])
+
+    @property
+    def columns(self):
+        """Get the list of columns for the concatenated molecules
+
+        Returns:
+            columns (list): Name of the columns of the descriptor
+        """
+        tmp_mol = dm.to_mol("CC(C)O")
+        columns = []
+        for fp in self:
+            fp_columns = getattr(fp, "columns", None)
+            fp_name = str(fp)
+            if fp_columns is None:
+                fp_out, _ = fp([tmp_mol])
+                fp_out = np.asarray(fp_out)
+                fp_columns = [f"{fp_name}:{ind}" for ind in range(fp_out.shape[-1])]
+            columns.extend(fp_columns)
+        return columns
+
+    def transform(self, mols: List[Union[dm.Mol, str]], **kwargs):
+        r"""
+        Calls the ``FPVecTransformer.transform`` for each transformer in
+        the current list, and concatenates the resulting fingerprints.
+
+        Args:
+            mols: List of SMILES or molecules
+            kwargs: named parameters for transform (see below)
+
+        Returns:
+            fps: Computed fingerprints of size NxD, where D is the
+                sum of the length of each transformer and N is the number of input
+                molecules that have been successfully featurized.
+        """
+
+        fps = []
+        for _, fp_trans in enumerate(self):
+            out = fp_trans.transform(mols, enforce_dtype=False, **kwargs)
+            out = datatype.cast(out, dtype="pandas")
+            fps.append(out)
+        fps = pd.concat(fps, axis=1)
+        fps.columns = self.columns
+        return fps.values
+
+    def __call__(
+        self,
+        mols: List[Union[dm.Mol, str]],
+        enforce_dtype: bool = False,
+        ignore_errors: bool = False,
+        **kwargs,
+    ):
+        r"""
+        Calls each of the internal transformer and concatenate results only on valid indices.
+
+        Args:
+            mols:  Mol or SMILES of the molecules to be transformed
+            enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+            ignore_errors: Whether to ignore errors during featurization or raise an error.
+            kwargs: Named parameters for the transform method
+
+        Returns:
+
+            fp: array
+                computed fingerprints of size NxD, where D is the
+                sum of the length of each transformer and N is the number of input
+                molecules that have been successfully featurized.
+            idx: array
+                Indices of successful featurization given the original molecule input.
+        """
+
+        fps = []
+        valid_idx = np.zeros(len(mols))
+        for _, transf in enumerate(self):
+            fp = transf.transform(mols, ignore_errors=ignore_errors, **kwargs)
+            fp, idx = transf._filter_none(fp)
+            fps.append(fp)
+            valid_idx[idx] += 1  # increase counter of valids
+        valid_idx = np.nonzero(valid_idx == len(self))[0]
+        fps = np.concatenate(fps, axis=1)
+        if self.dtype is not None and enforce_dtype:
+            fps = datatype.cast(fps, dtype=self.dtype, columns=self.columns)
+        if not ignore_errors:
+            return fps
+        return fps, list(valid_idx)
+
+    def fit_transform(
+        self,
+        mols: List[Union[str, dm.Mol]],
+        y: Optional[Iterable] = None,
+        fit_kwargs: Dict = None,
+        trans_kwargs: Dict = None,
+    ):
+        r"""
+        Calls the ``self.fit`` followed by the ``fit.transform`` for each transfomer in
+        the current list, and concatenates the resulting fingerprints.
+
+        Args:
+            mols: List of SMILES or molecules
+            y: target for the fitting. Usually ignored for FPVecTransformer
+            fit_kwargs:  named parameters for fit
+            fit_kwargs:named parameters for transform
+
+        Returns:
+
+            fp: computed fingerprints of size NxD, where D is the
+                sum of the length of each transformer and N is the number of input
+                molecules that have been successfully featurized.
+        """
+        fit_kwargs = {} if fit_kwargs is None else fit_kwargs
+        trans_kwargs = {} if trans_kwargs is None else trans_kwargs
+        self.fit(mols, y=y, **fit_kwargs)
+        return self.transform(mols, **trans_kwargs)
+
+    def fit(self, X: List[Union[dm.Mol, str]], y=None, **kwargs):
+        r"""
+        Calls the ``FPVecTransformer.fit`` for each transformer in the current list.
+
+        Args:
+            X: input list of molecules
+            y (list, optional): Optional list of molecular properties. Defaults to None.
+
+        Returns:
+            self: FeatConcat instance after fitting
+        """
+
+        for _, fp_trans in enumerate(self):
+            fp_trans.fit(X, y=y, **kwargs)
+        return self
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ columns + + + property + + +

+ + +
+ +

Get the list of columns for the concatenated molecules

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
columns + list + +
+

Name of the columns of the descriptor

+
+
+
+ +
+ +
+ + + + +

+ length + + + property + + +

+ + +
+ +

Length property for Feat concatenation. This is the sum of the length of each transformer. +Note that len returns the number of base featurizer here instead.

+
+ +
+ + + + +
+ + + + +

+ __add__(item) + +

+ + +
+ +

Override the __add__ method

+ +
+ Source code in molfeat/trans/concat.py +
75
+76
+77
+78
def __add__(self, item):
+    """Override the `__add__` method"""
+    self._check_supported(item)
+    super().__add__(item)
+
+
+
+ +
+ + +
+ + + + +

+ __call__(mols, enforce_dtype=False, ignore_errors=False, **kwargs) + +

+ + +
+ +

Calls each of the internal transformer and concatenate results only on valid indices.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

Mol or SMILES of the molecules to be transformed

+
+
+ required +
enforce_dtype + bool + +
+

whether to enforce the instance dtype in the generated fingerprint

+
+
+ False +
ignore_errors + bool + +
+

Whether to ignore errors during featurization or raise an error.

+
+
+ False +
kwargs + +
+

Named parameters for the transform method

+
+
+ {} +
+

Returns:

+
fp: array
+    computed fingerprints of size NxD, where D is the
+    sum of the length of each transformer and N is the number of input
+    molecules that have been successfully featurized.
+idx: array
+    Indices of successful featurization given the original molecule input.
+
+ +
+ Source code in molfeat/trans/concat.py +
176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
def __call__(
+    self,
+    mols: List[Union[dm.Mol, str]],
+    enforce_dtype: bool = False,
+    ignore_errors: bool = False,
+    **kwargs,
+):
+    r"""
+    Calls each of the internal transformer and concatenate results only on valid indices.
+
+    Args:
+        mols:  Mol or SMILES of the molecules to be transformed
+        enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+        ignore_errors: Whether to ignore errors during featurization or raise an error.
+        kwargs: Named parameters for the transform method
+
+    Returns:
+
+        fp: array
+            computed fingerprints of size NxD, where D is the
+            sum of the length of each transformer and N is the number of input
+            molecules that have been successfully featurized.
+        idx: array
+            Indices of successful featurization given the original molecule input.
+    """
+
+    fps = []
+    valid_idx = np.zeros(len(mols))
+    for _, transf in enumerate(self):
+        fp = transf.transform(mols, ignore_errors=ignore_errors, **kwargs)
+        fp, idx = transf._filter_none(fp)
+        fps.append(fp)
+        valid_idx[idx] += 1  # increase counter of valids
+    valid_idx = np.nonzero(valid_idx == len(self))[0]
+    fps = np.concatenate(fps, axis=1)
+    if self.dtype is not None and enforce_dtype:
+        fps = datatype.cast(fps, dtype=self.dtype, columns=self.columns)
+    if not ignore_errors:
+        return fps
+    return fps, list(valid_idx)
+
+
+
+ +
+ + +
+ + + + +

+ __init__(iterable=None, dtype=None, params=None, collate_fn=None) + +

+ + +
+ +

Featurizer concatenator

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
iterable + Optional[Union[Iterable, str]] + +
+

List of featurizer to concatenate.

+
+
+ None +
dtype + Optional[Callable] + +
+

Datatype of the computed fingerprint

+
+
+ None +
params + Optional[Dict[str, Any]] + +
+

Optional dictionary of parameters for the featurizers when there is a need for initializing them

+
+
+ None +
collate_fn + Optional[Callable] + +
+

optional function to provide for custom collating. +By default the collate function will be None, which will use the torch default

+
+
+ None +
+ +
+ Source code in molfeat/trans/concat.py +
36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
def __init__(
+    self,
+    iterable: Optional[Union[Iterable, str]] = None,
+    dtype: Optional[Callable] = None,
+    params: Optional[Dict[str, Any]] = None,
+    collate_fn: Optional[Callable] = None,
+):
+    """Featurizer concatenator
+
+    Args:
+        iterable: List of featurizer to concatenate.
+        dtype: Datatype of the computed fingerprint
+        params: Optional dictionary of parameters for the featurizers when there is a need for initializing them
+        collate_fn: optional function to provide for custom collating.
+            By default the collate function will be None, which will use the torch default
+    """
+    super().__init__()
+    self.params = params or {}
+    if isinstance(iterable, str):
+        iterable = [x.strip() for x in iterable.split(self._STR_DELIMITER)]
+    if iterable is not None:
+        for item in iterable:
+            if isinstance(item, str):
+                item = FPVecTransformer(kind=item, **self.params.get(item, {}))
+            self.append(item)
+    self.dtype = dtype
+    self._length = None
+    self.collate_fn = collate_fn
+
+
+
+ +
+ + +
+ + + + +

+ __setitem__(index, value) + +

+ + +
+ +

Override the __setitem__ method

+ +
+ Source code in molfeat/trans/concat.py +
80
+81
+82
+83
def __setitem__(self, index, value):
+    """Override the `__setitem__`  method"""
+    self._check_supported(value)
+    super().__setitem__(index, value)
+
+
+
+ +
+ + +
+ + + + +

+ append(item) + +

+ + +
+ +

Override the append to accept only FPVecTransformer

+ +
+ Source code in molfeat/trans/concat.py +
65
+66
+67
+68
def append(self, item):
+    r"""Override the ``append`` to accept only ``FPVecTransformer``"""
+    self._check_supported(item)
+    super().append(item)
+
+
+
+ +
+ + +
+ + + + +

+ fit(X, y=None, **kwargs) + +

+ + +
+ +

Calls the FPVecTransformer.fit for each transformer in the current list.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
X + List[Union[Mol, str]] + +
+

input list of molecules

+
+
+ required +
y + list + +
+

Optional list of molecular properties. Defaults to None.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
self + +
+

FeatConcat instance after fitting

+
+
+ +
+ Source code in molfeat/trans/concat.py +
245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
def fit(self, X: List[Union[dm.Mol, str]], y=None, **kwargs):
+    r"""
+    Calls the ``FPVecTransformer.fit`` for each transformer in the current list.
+
+    Args:
+        X: input list of molecules
+        y (list, optional): Optional list of molecular properties. Defaults to None.
+
+    Returns:
+        self: FeatConcat instance after fitting
+    """
+
+    for _, fp_trans in enumerate(self):
+        fp_trans.fit(X, y=y, **kwargs)
+    return self
+
+
+
+ +
+ + +
+ + + + +

+ fit_transform(mols, y=None, fit_kwargs=None, trans_kwargs=None) + +

+ + +
+ +

Calls the self.fit followed by the fit.transform for each transfomer in +the current list, and concatenates the resulting fingerprints.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[str, Mol]] + +
+

List of SMILES or molecules

+
+
+ required +
y + Optional[Iterable] + +
+

target for the fitting. Usually ignored for FPVecTransformer

+
+
+ None +
fit_kwargs + Dict + +
+

named parameters for fit

+
+
+ None +
fit_kwargs + Dict + +
+

named parameters for transform

+
+
+ None +
+

Returns:

+
fp: computed fingerprints of size NxD, where D is the
+    sum of the length of each transformer and N is the number of input
+    molecules that have been successfully featurized.
+
+ +
+ Source code in molfeat/trans/concat.py +
217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
def fit_transform(
+    self,
+    mols: List[Union[str, dm.Mol]],
+    y: Optional[Iterable] = None,
+    fit_kwargs: Dict = None,
+    trans_kwargs: Dict = None,
+):
+    r"""
+    Calls the ``self.fit`` followed by the ``fit.transform`` for each transfomer in
+    the current list, and concatenates the resulting fingerprints.
+
+    Args:
+        mols: List of SMILES or molecules
+        y: target for the fitting. Usually ignored for FPVecTransformer
+        fit_kwargs:  named parameters for fit
+        fit_kwargs:named parameters for transform
+
+    Returns:
+
+        fp: computed fingerprints of size NxD, where D is the
+            sum of the length of each transformer and N is the number of input
+            molecules that have been successfully featurized.
+    """
+    fit_kwargs = {} if fit_kwargs is None else fit_kwargs
+    trans_kwargs = {} if trans_kwargs is None else trans_kwargs
+    self.fit(mols, y=y, **fit_kwargs)
+    return self.transform(mols, **trans_kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ get_collate_fn(*args, **kwargs) + +

+ + +
+ +

Get collate function of this featurizer. The FeatConcat featurizer use the default +collate function which does not do anything.

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fn + +
+

Collate function for pytorch or None

+
+
+ +
+ Source code in molfeat/trans/concat.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
def get_collate_fn(self, *args, **kwargs):
+    """
+    Get collate function of this featurizer. The FeatConcat featurizer use the default
+    collate function which does not do anything.
+
+    Returns:
+        fn: Collate function for pytorch or None
+    """
+    return getattr(self, "collate_fn", None)
+
+
+
+ +
+ + +
+ + + + +

+ insert(index, item) + +

+ + +
+ +

Override the insert to accept only BaseFeaturizer

+ +
+ Source code in molfeat/trans/concat.py +
70
+71
+72
+73
def insert(self, index, item):
+    r"""Override the ``insert`` to accept only ``BaseFeaturizer``"""
+    self._check_suported(item)
+    super().insert(index, item)
+
+
+
+ +
+ + +
+ + + + +

+ iter_index(indexes) + +

+ + +
+ +

Allow the FeatConcat to be indexed using a list, or any other iterable.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
indexes + Union[int, Iterator[int]] + +
+

The indexes to index the FeatConcat.

+
+
+ required +
+

Returns + indexed_fpconcat: A new FeatConcat object with the indexed element

+ +
+ Source code in molfeat/trans/concat.py +
116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
def iter_index(self, indexes: Union[int, Iterator[int]]):
+    r"""
+    Allow the `FeatConcat` to be indexed using a list, or any other iterable.
+
+    Args:
+        indexes: The indexes to index the ``FeatConcat``.
+
+    Returns
+        indexed_fpconcat: A new FeatConcat object with the indexed element
+    """
+    if not isinstance(indexes, (list, tuple)):
+        try:
+            indexes = list(indexes)
+        except TypeError:
+            indexes = [indexes]
+    return FeatConcat([self[ii] for ii in indexes])
+
+
+
+ +
+ + +
+ + + + +

+ transform(mols, **kwargs) + +

+ + +
+ +

Calls the FPVecTransformer.transform for each transformer in +the current list, and concatenates the resulting fingerprints.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

List of SMILES or molecules

+
+
+ required +
kwargs + +
+

named parameters for transform (see below)

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fps + +
+

Computed fingerprints of size NxD, where D is the +sum of the length of each transformer and N is the number of input +molecules that have been successfully featurized.

+
+
+ +
+ Source code in molfeat/trans/concat.py +
152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
def transform(self, mols: List[Union[dm.Mol, str]], **kwargs):
+    r"""
+    Calls the ``FPVecTransformer.transform`` for each transformer in
+    the current list, and concatenates the resulting fingerprints.
+
+    Args:
+        mols: List of SMILES or molecules
+        kwargs: named parameters for transform (see below)
+
+    Returns:
+        fps: Computed fingerprints of size NxD, where D is the
+            sum of the length of each transformer and N is the number of input
+            molecules that have been successfully featurized.
+    """
+
+    fps = []
+    for _, fp_trans in enumerate(self):
+        out = fp_trans.transform(mols, enforce_dtype=False, **kwargs)
+        out = datatype.cast(out, dtype="pandas")
+        fps.append(out)
+    fps = pd.concat(fps, axis=1)
+    fps.columns = self.columns
+    return fps.values
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.fp.html b/0.9.7/api/molfeat.trans.fp.html new file mode 100644 index 0000000..4dcf97f --- /dev/null +++ b/0.9.7/api/molfeat.trans.fp.html @@ -0,0 +1,3004 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.trans.fp - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.trans.fp

+ +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ FPVecFilteredTransformer + + +

+ + +
+

+ Bases: FPVecTransformer

+ + +

Fingerprint molecule transformer with columns filters applying to the featurized vector when fit is called

+ +
+ Source code in molfeat/trans/fp.py +
171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
class FPVecFilteredTransformer(FPVecTransformer):
+    r"""
+    Fingerprint molecule transformer with columns filters applying to the featurized vector when `fit` is called
+    """
+
+    def __init__(
+        self,
+        kind: str = "ecfp:4",
+        length: int = 2000,
+        occ_threshold: float = 0,
+        del_invariant: bool = False,
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Callable = np.float32,
+        **params,
+    ):
+        """Molecular to vector featurization with filtering applied
+
+        Args:
+            kind (str, optional): Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".
+            length (int, optional): Length of the fingerprint. Defaults to 2000.
+            occ_threshold (float, optional): Minimum proportion a columns need to be non null to be kept.
+            del_invariant (bool, optional): Whether to delete columns that are invariant.
+            n_jobs (int, optional): Number of jobs. Defaults to 1.
+            verbose (bool, optional): Verbosity level. Defaults to False.
+            dtype (Callable, optional): Data type. Defaults to np.float32.
+            params (dict, optional): Any additional parameters to the fingerprint function
+        """
+
+        super().__init__(
+            kind=kind,
+            length=length,
+            n_jobs=n_jobs,
+            verbose=verbose,
+            dtype=dtype,
+            **params,
+        )
+        self.occ_threshold = occ_threshold
+        self.del_invariant = del_invariant
+        self._input_params.update(occ_threshold=occ_threshold, del_invariant=del_invariant)
+
+    def _update_params(self):
+        params = copy.deepcopy(self._input_params)
+        params.pop("featurizer", None)
+        params.pop("length", None)
+        params.pop("kind", None)
+        params.pop("verbose", None)
+        params.pop("dtype", None)
+        params.pop("n_jobs", None)
+        params.pop("occ_threshold", None)
+        params.pop("del_invariant", None)
+        self.featurizer = self._prepare_featurizer(self.kind, self.length, **params)
+
+    def __repr__(self):
+        return "{} (kind={}, length={}, occ_threshold={}, del_invariant={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self.kind),
+            _parse_to_evaluable_str(self.length),
+            _parse_to_evaluable_str(self.occ_threshold),
+            _parse_to_evaluable_str(self.del_invariant),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    def fit(self, X: List[Union[dm.Mol, str]], y: Optional[list] = None, **fit_params):
+        """Fit the current transformer on given dataset.
+
+        The goal of fitting is for example to identify nan columns values
+        that needs to be removed from the dataset
+
+        Args:
+            X: input list of molecules
+            y (list, optional): Optional list of molecular properties. Defaults to None.
+
+        Returns:
+            self: MolTransformer instance after fitting
+        """
+
+        feats = self.transform(X, ignore_errors=True)
+        lengths = [len(x) for x in feats if not datatype.is_null(x)]
+
+        if lengths:
+            # we will ignore all nan
+            feats = datatype.to_numpy([f for f in feats if not datatype.is_null(f)])
+            # all nan columns
+            unwanted_columns = []
+            # let's adjsut occ to float
+            occ_threshold = self.occ_threshold
+            if occ_threshold > 1:
+                occ_threshold = occ_threshold / feats.shape[0]
+            # not nan
+            unwanted_columns.append(~np.any(np.isnan(feats), axis=0))
+            # not enough set bits
+            unwanted_columns.append(
+                (np.count_nonzero(feats, axis=0) / feats.shape[0]) > occ_threshold
+            )
+            if self.del_invariant:
+                unwanted_columns.append(~np.all(feats == feats[0, :], axis=0))
+            self.cols_to_keep = (np.logical_and.reduce(unwanted_columns)).nonzero()[0]
+        self._fitted = True
+        return self
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(kind='ecfp:4', length=2000, occ_threshold=0, del_invariant=False, n_jobs=1, verbose=False, dtype=np.float32, **params) + +

+ + +
+ +

Molecular to vector featurization with filtering applied

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
kind + str + +
+

Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".

+
+
+ 'ecfp:4' +
length + int + +
+

Length of the fingerprint. Defaults to 2000.

+
+
+ 2000 +
occ_threshold + float + +
+

Minimum proportion a columns need to be non null to be kept.

+
+
+ 0 +
del_invariant + bool + +
+

Whether to delete columns that are invariant.

+
+
+ False +
n_jobs + int + +
+

Number of jobs. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to False.

+
+
+ False +
dtype + Callable + +
+

Data type. Defaults to np.float32.

+
+
+ float32 +
params + dict + +
+

Any additional parameters to the fingerprint function

+
+
+ {} +
+ +
+ Source code in molfeat/trans/fp.py +
176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
def __init__(
+    self,
+    kind: str = "ecfp:4",
+    length: int = 2000,
+    occ_threshold: float = 0,
+    del_invariant: bool = False,
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Callable = np.float32,
+    **params,
+):
+    """Molecular to vector featurization with filtering applied
+
+    Args:
+        kind (str, optional): Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".
+        length (int, optional): Length of the fingerprint. Defaults to 2000.
+        occ_threshold (float, optional): Minimum proportion a columns need to be non null to be kept.
+        del_invariant (bool, optional): Whether to delete columns that are invariant.
+        n_jobs (int, optional): Number of jobs. Defaults to 1.
+        verbose (bool, optional): Verbosity level. Defaults to False.
+        dtype (Callable, optional): Data type. Defaults to np.float32.
+        params (dict, optional): Any additional parameters to the fingerprint function
+    """
+
+    super().__init__(
+        kind=kind,
+        length=length,
+        n_jobs=n_jobs,
+        verbose=verbose,
+        dtype=dtype,
+        **params,
+    )
+    self.occ_threshold = occ_threshold
+    self.del_invariant = del_invariant
+    self._input_params.update(occ_threshold=occ_threshold, del_invariant=del_invariant)
+
+
+
+ +
+ + +
+ + + + +

+ fit(X, y=None, **fit_params) + +

+ + +
+ +

Fit the current transformer on given dataset.

+

The goal of fitting is for example to identify nan columns values +that needs to be removed from the dataset

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
X + List[Union[Mol, str]] + +
+

input list of molecules

+
+
+ required +
y + list + +
+

Optional list of molecular properties. Defaults to None.

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
self + +
+

MolTransformer instance after fitting

+
+
+ +
+ Source code in molfeat/trans/fp.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
def fit(self, X: List[Union[dm.Mol, str]], y: Optional[list] = None, **fit_params):
+    """Fit the current transformer on given dataset.
+
+    The goal of fitting is for example to identify nan columns values
+    that needs to be removed from the dataset
+
+    Args:
+        X: input list of molecules
+        y (list, optional): Optional list of molecular properties. Defaults to None.
+
+    Returns:
+        self: MolTransformer instance after fitting
+    """
+
+    feats = self.transform(X, ignore_errors=True)
+    lengths = [len(x) for x in feats if not datatype.is_null(x)]
+
+    if lengths:
+        # we will ignore all nan
+        feats = datatype.to_numpy([f for f in feats if not datatype.is_null(f)])
+        # all nan columns
+        unwanted_columns = []
+        # let's adjsut occ to float
+        occ_threshold = self.occ_threshold
+        if occ_threshold > 1:
+            occ_threshold = occ_threshold / feats.shape[0]
+        # not nan
+        unwanted_columns.append(~np.any(np.isnan(feats), axis=0))
+        # not enough set bits
+        unwanted_columns.append(
+            (np.count_nonzero(feats, axis=0) / feats.shape[0]) > occ_threshold
+        )
+        if self.del_invariant:
+            unwanted_columns.append(~np.all(feats == feats[0, :], axis=0))
+        self.cols_to_keep = (np.logical_and.reduce(unwanted_columns)).nonzero()[0]
+    self._fitted = True
+    return self
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ FPVecTransformer + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Molecular fingerprinter that computes various fingerprints and descriptors regularly used in QSAR modeling.

+
+

Note

+

For fingerprints with a radius parameter, you can provide the radius using the notation "fp:radius". +For example "Morgan Circular 2" can be written as "morgan:2". Under the hood, morgan and ecfp fingerprints +are equated with the proper radius/diameter adjustment.

+

For counting fingerprints, you just need to add the '-count' suffix to the name of the fingerprint. For example: +"morgan-count:2"

+
+ +
+ Source code in molfeat/trans/fp.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
class FPVecTransformer(MoleculeTransformer):
+    r"""
+    Molecular fingerprinter that computes various fingerprints and descriptors regularly used in QSAR modeling.
+
+    !!! note
+        For fingerprints with a radius parameter, you can provide the radius using the notation "fp:radius".
+        For example "Morgan Circular 2" can be written as "morgan:2". Under the hood, morgan and ecfp fingerprints
+        are equated with the proper radius/diameter adjustment.
+
+        For counting fingerprints, you just need to add the '-count' suffix to the name of the fingerprint. For example:
+        "morgan-count:2"
+    """
+
+    AVAILABLE_FPS = list(FP_FUNCS.keys()) + [
+        "desc3D",
+        "desc2D",
+        "mordred",
+        "cats2D",
+        "cats3D",
+        "pharm2D",
+        "pharm3D",
+        "scaffoldkeys",
+        "skeys",
+        "electroshape",
+        "usr",
+        "usrcat",
+    ]
+
+    def __init__(
+        self,
+        kind: str = "ecfp:4",
+        length: int = 2000,
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Callable = np.float32,
+        parallel_kwargs: Optional[dict] = None,
+        **params,
+    ):
+        """Molecular to vector fingerprinter
+
+        Args:
+            kind (str, optional): Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".
+            length (int, optional): Length of the fingerprint. Defaults to 2000.
+            n_jobs (int, optional): Number of jobs. Defaults to 1.
+            verbose (bool, optional): Verbosity level. Defaults to False.
+            dtype (Callable, optional): Data type. Defaults to np.float32.
+            parallel_kwargs (dict, optional): Optional arguments to pass to dm.parallelized when required. Defaults to None.
+            params (dict, optional): Any additional parameters to the fingerprint function
+        """
+        self._save_input_args()
+
+        # remove any featurizer that was passed as argument
+        params.pop("featurizer", None)
+        self._feat_params = params
+        featurizer = self._prepare_featurizer(kind, length, **params)
+        super().__init__(
+            featurizer=featurizer,
+            n_jobs=n_jobs,
+            verbose=verbose,
+            dtype=dtype,
+            parallel_kwargs=parallel_kwargs,
+            **params,
+        )
+        self.kind = kind
+        self.length = length
+        self._length = None
+        # update length for featurizer that have they fixed length
+        # EN: setting up a protected _length function helps to bypass
+        # the absurd "is" comparison done by sklearn in clone
+        # note that the featurizer length would likely be ignored by featurizer
+        # that do not support a variable length
+        if hasattr(self.featurizer, "__len__"):
+            self._length = len(featurizer)
+        self._input_params.update(kind=kind, length=length)
+        if self.kind.lower() in _UNSERIALIZABLE_FPS:
+            self.parallel_kwargs.update(scheduler="threads")
+
+    def __len__(self):
+        """Compute featurizer length"""
+        if getattr(self, "cols_to_keep", None) is None and self._length is not None:
+            return self._length
+        return super().__len__()
+
+    def _get_param_names(self):
+        """Get parameter names for the estimator"""
+        out = self._input_params.keys()
+        return [x for x in out if x != "featurizer"]
+
+    @classmethod
+    def _prepare_featurizer(cls, kind: str, length: int, **params):
+        """Prepare featurizer from its name and parameters
+
+        Args:
+            kind: Name of the featurizer
+            length: Length of the featurizer
+        Returns:
+            calculator (Callable): fingerprint calculator
+        """
+        match = re.search(r":(\d+)$", kind)
+        radius = None
+        if match is not None:
+            radius = match.group(1)
+        if radius is not None:
+            kind = kind.replace(radius, "").strip(":").lower()
+            radius = int(radius)
+            if any(x in kind for x in ["ecfp", "fcfp"]):
+                radius = max(radius // 2, 1)
+            params["radius"] = radius
+        if any(x in kind for x in ["morgan", "morgan_circular", "morgan-circular"]):
+            kind.replace("_circular", "").replace("-circular", "").replace("morgan", "ecfp")
+        if kind not in cls.AVAILABLE_FPS:
+            raise ValueError(f"{kind} is not a valid featurizer")
+        params["length"] = length
+
+        return get_calculator(kind, **params)
+
+    def _update_params(self):
+        params = copy.deepcopy(self._input_params)
+        params.pop("featurizer", None)
+        params.pop("length", None)
+        params.pop("kind", None)
+        params.pop("verbose", None)
+        params.pop("dtype", None)
+        params.pop("n_jobs", None)
+        self._fitted = False
+        self.featurizer = self._prepare_featurizer(self.kind, self.length, **params)
+
+    def __repr__(self):
+        return "{}(kind={}, length={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self.kind),
+            _parse_to_evaluable_str(self.length),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    def __str__(self):
+        # The output for the print function
+        return self.__repr__()
+
+    def __eq__(self, other):
+        same_type = type(self) == type(other)
+        return same_type and all(
+            [getattr(other, k) == v for k, v in self.get_params() if not callable(v)]
+        )
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(repr(self))
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(kind='ecfp:4', length=2000, n_jobs=1, verbose=False, dtype=np.float32, parallel_kwargs=None, **params) + +

+ + +
+ +

Molecular to vector fingerprinter

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
kind + str + +
+

Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".

+
+
+ 'ecfp:4' +
length + int + +
+

Length of the fingerprint. Defaults to 2000.

+
+
+ 2000 +
n_jobs + int + +
+

Number of jobs. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to False.

+
+
+ False +
dtype + Callable + +
+

Data type. Defaults to np.float32.

+
+
+ float32 +
parallel_kwargs + dict + +
+

Optional arguments to pass to dm.parallelized when required. Defaults to None.

+
+
+ None +
params + dict + +
+

Any additional parameters to the fingerprint function

+
+
+ {} +
+ +
+ Source code in molfeat/trans/fp.py +
47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
def __init__(
+    self,
+    kind: str = "ecfp:4",
+    length: int = 2000,
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Callable = np.float32,
+    parallel_kwargs: Optional[dict] = None,
+    **params,
+):
+    """Molecular to vector fingerprinter
+
+    Args:
+        kind (str, optional): Name of the fingerprints (one supported fingerprints: see self.AVAILABLE_FPS). Defaults to "ecfp4".
+        length (int, optional): Length of the fingerprint. Defaults to 2000.
+        n_jobs (int, optional): Number of jobs. Defaults to 1.
+        verbose (bool, optional): Verbosity level. Defaults to False.
+        dtype (Callable, optional): Data type. Defaults to np.float32.
+        parallel_kwargs (dict, optional): Optional arguments to pass to dm.parallelized when required. Defaults to None.
+        params (dict, optional): Any additional parameters to the fingerprint function
+    """
+    self._save_input_args()
+
+    # remove any featurizer that was passed as argument
+    params.pop("featurizer", None)
+    self._feat_params = params
+    featurizer = self._prepare_featurizer(kind, length, **params)
+    super().__init__(
+        featurizer=featurizer,
+        n_jobs=n_jobs,
+        verbose=verbose,
+        dtype=dtype,
+        parallel_kwargs=parallel_kwargs,
+        **params,
+    )
+    self.kind = kind
+    self.length = length
+    self._length = None
+    # update length for featurizer that have they fixed length
+    # EN: setting up a protected _length function helps to bypass
+    # the absurd "is" comparison done by sklearn in clone
+    # note that the featurizer length would likely be ignored by featurizer
+    # that do not support a variable length
+    if hasattr(self.featurizer, "__len__"):
+        self._length = len(featurizer)
+    self._input_params.update(kind=kind, length=length)
+    if self.kind.lower() in _UNSERIALIZABLE_FPS:
+        self.parallel_kwargs.update(scheduler="threads")
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Compute featurizer length

+ +
+ Source code in molfeat/trans/fp.py +
 96
+ 97
+ 98
+ 99
+100
def __len__(self):
+    """Compute featurizer length"""
+    if getattr(self, "cols_to_keep", None) is None and self._length is not None:
+        return self._length
+    return super().__len__()
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.graph.html b/0.9.7/api/molfeat.trans.graph.html new file mode 100644 index 0000000..a752b7c --- /dev/null +++ b/0.9.7/api/molfeat.trans.graph.html @@ -0,0 +1,6596 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.trans.graph - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

molfeat.trans.graph

+ +

Graphs

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ AdjGraphTransformer + + +

+ + +
+

+ Bases: GraphTransformer

+ + +

Transforms a molecule into a molecular graph representation formed by an +adjacency matrix of atoms and a set of features for each atom (and potentially bond).

+ +
+ Source code in molfeat/trans/graph/adj.py +
191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
class AdjGraphTransformer(GraphTransformer):
+    r"""
+    Transforms a molecule into a molecular graph representation formed by an
+    adjacency matrix of atoms and a set of features for each atom (and potentially bond).
+    """
+
+    def __init__(
+        self,
+        atom_featurizer: Optional[Callable] = None,
+        bond_featurizer: Optional[Callable] = None,
+        self_loop: bool = False,
+        explicit_hydrogens: bool = False,
+        canonical_atom_order: bool = True,
+        max_n_atoms: Optional[int] = None,
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Optional[Callable] = None,
+        **params,
+    ):
+        """
+        Adjacency graph transformer
+
+        Args:
+            atom_featurizer: atom featurizer to use
+            bond_featurizer: bond featurizer to use
+            self_loop: whether to add self loops to the adjacency matrix. Your bond featurizer needs to supports this.
+            explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+            canonical_atom_order: Whether to use a canonical ordering of the atoms
+            max_n_atoms: Maximum number of atom to set the size of the graph
+            n_jobs: Number of job to run in parallel. Defaults to 1.
+            verbose: Verbosity level. Defaults to True.
+            dtype: Output data type. Defaults to None, where numpy arrays are returned.
+        """
+        super().__init__(
+            atom_featurizer=atom_featurizer,
+            bond_featurizer=bond_featurizer,
+            max_n_atoms=max_n_atoms,
+            self_loop=self_loop,
+            n_jobs=n_jobs,
+            verbose=verbose,
+            dtype=dtype,
+            canonical_atom_order=canonical_atom_order,
+            explicit_hydrogens=explicit_hydrogens,
+            **params,
+        )
+
+    def _graph_featurizer(self, mol: dm.Mol):
+        """Internal adjacency graph featurizer
+
+        Returns:
+            mat : N,N matrix representing the graph
+        """
+        adj_mat = GetAdjacencyMatrix(mol)
+        if self.self_loop:
+            np.fill_diagonal(adj_mat, 1)
+        return adj_mat
+
+    @staticmethod
+    def _collate_batch(batch, max_n_atoms=None, pack=False):
+        """
+        Collate a batch of samples. Expected format is either single graphs, e.g. a list of tuples of the form (adj, feats),
+        or graphs together with their labels, where each sample is of the form ((adj, feats), label).
+
+        Args:
+             batch: list
+                Batch of samples.
+             max_n_atoms: Max num atoms in graphs.
+             pack: Whether the graph should be packed or not into a supergraph.
+
+        Returns:
+            Collated samples.
+
+        """
+        if isinstance(batch[0], (list, tuple)) and len(batch[0]) > 2:
+            graphs, feats, labels = map(list, zip(*batch))
+            batched_graph = AdjGraphTransformer._collate_graphs(
+                zip(graphs, feats), max_n_atoms=max_n_atoms, pack=pack
+            )
+
+            if torch.is_tensor(labels[0]):
+                return batched_graph, torch.stack(labels)
+            else:
+                return batched_graph, labels
+
+        # Otherwise we assume the batch is composed of single graphs.
+        return AdjGraphTransformer._collate_graphs(batch, max_n_atoms=max_n_atoms, pack=pack)
+
+    @staticmethod
+    def _collate_graphs(batch, max_n_atoms, pack):
+        if not all([len(b) == 2 for b in batch]):
+            raise ValueError("Default collate function only supports pair of (Graph, AtomFeats) ")
+
+        graphs, feats = zip(*batch)
+        # in case someone does not convert to tensor and wants to use collate
+        # who would do that ?
+        graphs = [datatype.to_tensor(g) for g in graphs]
+        feats = [datatype.to_tensor(f) for f in feats]
+        if pack:
+            return pack_graph(graphs, feats)
+        else:
+            if max_n_atoms is None:
+                cur_max_atoms = max([x.shape[0] for x in feats])
+            else:
+                cur_max_atoms = max_n_atoms
+
+            graphs = torch.stack(
+                [
+                    F.pad(
+                        g,
+                        (0, cur_max_atoms - g.shape[0], 0, cur_max_atoms - g.shape[1]),
+                    )
+                    for g in graphs
+                ]
+            )
+            feats = torch.stack([F.pad(f, (0, 0, 0, cur_max_atoms - f.shape[0])) for f in feats])
+        return graphs, feats
+
+    def get_collate_fn(self, pack: bool = False, max_n_atoms: Optional[int] = None):
+        """Get collate function. Adj Graph are collated either through batching
+        or diagonally packing the graph into a super graph. Either a format of (batch, labels) or graph is supported.
+
+        !!! note
+            Edge features are not supported yet in the default collate because
+            there is no straightforward and universal way to collate them
+
+        Args:
+            pack : Whether to pack or batch the graphs.
+            max_n_atoms: Maximum number of node per graph when packing is False.
+                If the graph needs to be packed and it is not set, instance attributes will be used
+        """
+        if self.bond_featurizer is not None:
+            raise ValueError(
+                "Default collate function is not supported for transformer with bond featurizer"
+            )
+        max_n_atoms = max_n_atoms or self.max_n_atoms
+
+        return partial(self._collate_batch, pack=pack, max_n_atoms=max_n_atoms)
+
+    def transform(self, mols: List[Union[dm.Mol, str]], keep_dict: bool = False, **kwargs):
+        r"""
+        Compute the graph featurization for a set of molecules.
+
+        Args:
+            mols: a list containing smiles or mol objects
+            keep_dict: whether to keep atom and bond featurizer as dict or get the underlying data
+            kwargs: arguments to pass to the `super().transform`
+
+         Returns:
+             features: a list of features for each molecule in the input set
+        """
+        features = super().transform(mols, **kwargs)
+        if not keep_dict:
+            out = []
+            for i, feat in enumerate(features):
+                if feat is not None:
+                    graph, nodes, *bonds = feat
+                    if isinstance(nodes, dict):
+                        nodes = nodes[self.atom_featurizer.name]
+                    if len(bonds) > 0 and isinstance(bonds[0], dict):
+                        try:
+                            bonds = bonds[0][self.bond_featurizer.name]
+                            feat = (graph, nodes, bonds)
+                        except KeyError as e:
+                            # more information on failure
+                            logger.error("Encountered Molecule without bonds")
+                            raise e
+                    else:
+                        feat = (graph, nodes)
+                out.append(feat)
+            features = out
+        return features
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Transforms a molecule into an Adjacency graph with a set of atom and optional bond features
+
+        Args:
+            mol: molecule to transform into features
+
+        Returns
+            feat: featurized input molecule (adj_mat, node_feat) or (adj_mat, node_feat, edge_feat)
+
+        """
+        if mol is None:
+            return None
+
+        try:
+            adj_matrix = datatype.cast(self._graph_featurizer(mol), dtype=self.dtype)
+            atom_data = self.atom_featurizer(mol, dtype=self.dtype)
+            feats = (adj_matrix, atom_data)
+            bond_data = None
+            if self.bond_featurizer is not None:
+                bond_data = self.bond_featurizer(mol, flat=False, dtype=self.dtype)
+                feats = (
+                    adj_matrix,
+                    atom_data,
+                    bond_data,
+                )
+        except Exception as e:
+            if self.verbose:
+                logger.error(e)
+            feats = None
+        return feats
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(atom_featurizer=None, bond_featurizer=None, self_loop=False, explicit_hydrogens=False, canonical_atom_order=True, max_n_atoms=None, n_jobs=1, verbose=False, dtype=None, **params) + +

+ + +
+ +

Adjacency graph transformer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
atom_featurizer + Optional[Callable] + +
+

atom featurizer to use

+
+
+ None +
bond_featurizer + Optional[Callable] + +
+

bond featurizer to use

+
+
+ None +
self_loop + bool + +
+

whether to add self loops to the adjacency matrix. Your bond featurizer needs to supports this.

+
+
+ False +
explicit_hydrogens + bool + +
+

Whether to use explicit hydrogen in preprocessing of the input molecule

+
+
+ False +
canonical_atom_order + bool + +
+

Whether to use a canonical ordering of the atoms

+
+
+ True +
max_n_atoms + Optional[int] + +
+

Maximum number of atom to set the size of the graph

+
+
+ None +
n_jobs + int + +
+

Number of job to run in parallel. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to True.

+
+
+ False +
dtype + Optional[Callable] + +
+

Output data type. Defaults to None, where numpy arrays are returned.

+
+
+ None +
+ +
+ Source code in molfeat/trans/graph/adj.py +
197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
def __init__(
+    self,
+    atom_featurizer: Optional[Callable] = None,
+    bond_featurizer: Optional[Callable] = None,
+    self_loop: bool = False,
+    explicit_hydrogens: bool = False,
+    canonical_atom_order: bool = True,
+    max_n_atoms: Optional[int] = None,
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Optional[Callable] = None,
+    **params,
+):
+    """
+    Adjacency graph transformer
+
+    Args:
+        atom_featurizer: atom featurizer to use
+        bond_featurizer: bond featurizer to use
+        self_loop: whether to add self loops to the adjacency matrix. Your bond featurizer needs to supports this.
+        explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+        canonical_atom_order: Whether to use a canonical ordering of the atoms
+        max_n_atoms: Maximum number of atom to set the size of the graph
+        n_jobs: Number of job to run in parallel. Defaults to 1.
+        verbose: Verbosity level. Defaults to True.
+        dtype: Output data type. Defaults to None, where numpy arrays are returned.
+    """
+    super().__init__(
+        atom_featurizer=atom_featurizer,
+        bond_featurizer=bond_featurizer,
+        max_n_atoms=max_n_atoms,
+        self_loop=self_loop,
+        n_jobs=n_jobs,
+        verbose=verbose,
+        dtype=dtype,
+        canonical_atom_order=canonical_atom_order,
+        explicit_hydrogens=explicit_hydrogens,
+        **params,
+    )
+
+
+
+ +
+ + +
+ + + + +

+ get_collate_fn(pack=False, max_n_atoms=None) + +

+ + +
+ +

Get collate function. Adj Graph are collated either through batching +or diagonally packing the graph into a super graph. Either a format of (batch, labels) or graph is supported.

+
+

Note

+

Edge features are not supported yet in the default collate because +there is no straightforward and universal way to collate them

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
pack + +
+

Whether to pack or batch the graphs.

+
+
+ False +
max_n_atoms + Optional[int] + +
+

Maximum number of node per graph when packing is False. +If the graph needs to be packed and it is not set, instance attributes will be used

+
+
+ None +
+ +
+ Source code in molfeat/trans/graph/adj.py +
308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
def get_collate_fn(self, pack: bool = False, max_n_atoms: Optional[int] = None):
+    """Get collate function. Adj Graph are collated either through batching
+    or diagonally packing the graph into a super graph. Either a format of (batch, labels) or graph is supported.
+
+    !!! note
+        Edge features are not supported yet in the default collate because
+        there is no straightforward and universal way to collate them
+
+    Args:
+        pack : Whether to pack or batch the graphs.
+        max_n_atoms: Maximum number of node per graph when packing is False.
+            If the graph needs to be packed and it is not set, instance attributes will be used
+    """
+    if self.bond_featurizer is not None:
+        raise ValueError(
+            "Default collate function is not supported for transformer with bond featurizer"
+        )
+    max_n_atoms = max_n_atoms or self.max_n_atoms
+
+    return partial(self._collate_batch, pack=pack, max_n_atoms=max_n_atoms)
+
+
+
+ +
+ + +
+ + + + +

+ transform(mols, keep_dict=False, **kwargs) + +

+ + +
+ +

Compute the graph featurization for a set of molecules.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

a list containing smiles or mol objects

+
+
+ required +
keep_dict + bool + +
+

whether to keep atom and bond featurizer as dict or get the underlying data

+
+
+ False +
kwargs + +
+

arguments to pass to the super().transform

+
+
+ {} +
+

Returns: + features: a list of features for each molecule in the input set

+ +
+ Source code in molfeat/trans/graph/adj.py +
329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
def transform(self, mols: List[Union[dm.Mol, str]], keep_dict: bool = False, **kwargs):
+    r"""
+    Compute the graph featurization for a set of molecules.
+
+    Args:
+        mols: a list containing smiles or mol objects
+        keep_dict: whether to keep atom and bond featurizer as dict or get the underlying data
+        kwargs: arguments to pass to the `super().transform`
+
+     Returns:
+         features: a list of features for each molecule in the input set
+    """
+    features = super().transform(mols, **kwargs)
+    if not keep_dict:
+        out = []
+        for i, feat in enumerate(features):
+            if feat is not None:
+                graph, nodes, *bonds = feat
+                if isinstance(nodes, dict):
+                    nodes = nodes[self.atom_featurizer.name]
+                if len(bonds) > 0 and isinstance(bonds[0], dict):
+                    try:
+                        bonds = bonds[0][self.bond_featurizer.name]
+                        feat = (graph, nodes, bonds)
+                    except KeyError as e:
+                        # more information on failure
+                        logger.error("Encountered Molecule without bonds")
+                        raise e
+                else:
+                    feat = (graph, nodes)
+            out.append(feat)
+        features = out
+    return features
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ CompleteGraphTransformer + + +

+ + +
+

+ Bases: GraphTransformer

+ + +

Transforms a molecule into a complete graph

+ +
+ Source code in molfeat/trans/graph/adj.py +
396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
class CompleteGraphTransformer(GraphTransformer):
+    """Transforms a molecule into a complete graph"""
+
+    def _graph_featurizer(self, mol: dm.Mol):
+        """Complete grah featurizer
+
+        Args:
+            mol: molecule to transform into a graph
+
+        Returns:
+            mat : N,N matrix representing the graph
+        """
+        n_atoms = mol.GetNumAtoms()
+        adj_mat = np.ones((n_atoms, n_atoms))
+        if not self.self_loop:
+            np.fill_diagonal(adj_mat, 0)
+        return adj_mat
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ DGLGraphTransformer + + +

+ + +
+

+ Bases: GraphTransformer

+ + +

Transforms a molecule into a molecular graph representation formed by an +adjacency matrix of atoms and a set of features for each atom (and potentially bond).

+ +
+ Source code in molfeat/trans/graph/adj.py +
455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
class DGLGraphTransformer(GraphTransformer):
+    r"""
+    Transforms a molecule into a molecular graph representation formed by an
+    adjacency matrix of atoms and a set of features for each atom (and potentially bond).
+    """
+
+    def __init__(
+        self,
+        atom_featurizer: Optional[Callable] = None,
+        bond_featurizer: Optional[Callable] = None,
+        self_loop: bool = False,
+        explicit_hydrogens: bool = False,
+        canonical_atom_order: bool = True,
+        complete_graph: bool = False,
+        num_virtual_nodes: int = 0,
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Optional[Callable] = None,
+        **params,
+    ):
+        """
+        Adjacency graph transformer
+
+        Args:
+           atom_featurizer: atom featurizer to use
+           bond_featurizer: atom featurizer to use
+           self_loop: whether to use self loop or not
+           explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+           canonical_atom_order: Whether to use a canonical ordering of the atoms
+           complete_graph: Whether to use a complete graph constructor or not
+           num_virtual_nodes: number of virtual nodes to add
+           n_jobs: Number of job to run in parallel. Defaults to 1.
+           verbose: Verbosity level. Defaults to True.
+           dtype: Output data type. Defaults to None, where numpy arrays are returned.
+        """
+
+        super().__init__(
+            atom_featurizer=atom_featurizer,
+            bond_featurizer=bond_featurizer,
+            n_jobs=n_jobs,
+            self_loop=self_loop,
+            num_virtual_nodes=num_virtual_nodes,
+            complete_graph=complete_graph,
+            verbose=verbose,
+            dtype=dtype,
+            canonical_atom_order=canonical_atom_order,
+            explicit_hydrogens=explicit_hydrogens,
+            **params,
+        )
+
+        if not requires.check("dgllife"):
+            logger.error(
+                "Cannot find dgllife. It's required for some features. Please install it first !"
+            )
+        if not requires.check("dgl"):
+            raise ValueError("Cannot find dgl, please install it first !")
+        if self.dtype is not None and not datatype.is_dtype_tensor(self.dtype):
+            raise ValueError("DGL featurizer only supports torch tensors currently")
+
+    def auto_self_loop(self):
+        """Patch the featurizer to auto support self loop based on the bond featurizer characteristics"""
+        super().auto_self_loop()
+        if isinstance(self.bond_featurizer, EdgeMatCalculator):
+            self.self_loop = True
+
+    def get_collate_fn(self, *args, **kwargs):
+        """Return DGL collate function for a batch of molecular graph"""
+        return self._dgl_collate
+
+    @staticmethod
+    def _dgl_collate(batch):
+        """
+        Batch of samples to be used with the featurizer. A sample of the batch is expected to
+        be of the form (graph, label) or simply a graph.
+
+        Args:
+         batch: list
+            batch of samples.
+
+        returns:
+            Batched lists of graphs and labels
+        """
+        if isinstance(batch[0], (list, tuple)):
+            graphs, labels = map(list, zip(*batch))
+            batched_graph = dgl.batch(graphs)
+
+            if torch.is_tensor(labels[0]):
+                return batched_graph, torch.stack(labels)
+            else:
+                return batched_graph, labels
+
+        # Otherwise we assume the batch is composed of single graphs.
+        return dgl.batch(batch)
+
+    def _graph_featurizer(self, mol: dm.Mol):
+        """Convert a molecule to a DGL graph.
+
+        This only supports the bigraph and not any virtual nodes or complete graph.
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns:
+            graph (dgl.DGLGraph): graph built with dgl
+        """
+
+        n_atoms = mol.GetNumAtoms()
+        num_bonds = mol.GetNumBonds()
+        graph = dgl.graph()
+        graph.add_nodes(n_atoms)
+        bond_src = []
+        bond_dst = []
+        for i in range(num_bonds):
+            bond = mol.GetBondWithIdx(i)
+            begin_idx = bond.GetBeginAtom().GetIdx()
+            end_idx = bond.GetEndAtom().GetIdx()
+            bond_src.append(begin_idx)
+            bond_dst.append(end_idx)
+            # set up the reverse direction
+            bond_src.append(end_idx)
+            bond_dst.append(begin_idx)
+
+        if self.self_loop:
+            nodes = graph.nodes().tolist()
+            bond_src.extend(nodes)
+            bond_dst.extend(nodes)
+
+        graph.add_edges(bond_src, bond_dst)
+        return graph
+
+    @property
+    def atom_dim(self):
+        return super(DGLGraphTransformer, self).atom_dim + int(self.num_virtual_nodes > 0)
+
+    @property
+    def bond_dim(self):
+        return super(DGLGraphTransformer, self).bond_dim + int(self.num_virtual_nodes > 0)
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Transforms a molecule into an Adjacency graph with a set of atom and bond features
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns
+            graph (dgl.DGLGraph): a dgl graph containing atoms and bond data
+
+        """
+        if mol is None:
+            return None
+
+        graph = None
+        if requires.check("dgllife"):
+            graph_featurizer = dgllife_utils.mol_to_bigraph
+
+            if self.complete_graph:
+                graph_featurizer = dgllife_utils.mol_to_complete_graph
+            try:
+                graph = graph_featurizer(
+                    mol,
+                    add_self_loop=self.self_loop,
+                    node_featurizer=self.__recast(self.atom_featurizer),
+                    edge_featurizer=self.__recast(self.bond_featurizer),
+                    canonical_atom_order=self.canonical_atom_order,
+                    explicit_hydrogens=self.explicit_hydrogens,
+                    num_virtual_nodes=self.num_virtual_nodes,
+                )
+            except Exception as e:
+                if self.verbose:
+                    logger.error(e)
+                graph = None
+
+        elif requires.check("dgl") and not self.complete_graph:
+            # we need to build the graph ourselves.
+            graph = self._graph_featurizer(mol)
+            if self.atom_featurizer is not None:
+                graph.ndata.update(self.atom_featurizer(mol, dtype=self.dtype))
+
+            if self.bond_featurizer is not None:
+                graph.edata.update(self.bond_featurizer(mol, dtype=self.dtype))
+
+        else:
+            raise ValueError(
+                "Incorrect setup, please install missing packages (dgl, dgllife) for more features"
+            )
+        return graph
+
+    def __recast(self, featurizer: Callable):
+        """Recast the output of a featurizer to the transformer underlying type
+
+        Args:
+            featurizer: featurizer to patch
+        """
+        if featurizer is None:
+            return None
+        dtype = self.dtype or torch.float
+
+        def patch_feats(*args, **kwargs):
+            out_dict = featurizer(*args, **kwargs)
+            out_dict = {k: datatype.cast(val, dtype=dtype) for k, val in out_dict.items()}
+            return out_dict
+
+        return patch_feats
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(atom_featurizer=None, bond_featurizer=None, self_loop=False, explicit_hydrogens=False, canonical_atom_order=True, complete_graph=False, num_virtual_nodes=0, n_jobs=1, verbose=False, dtype=None, **params) + +

+ + +
+ +

Adjacency graph transformer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
atom_featurizer + Optional[Callable] + +
+

atom featurizer to use

+
+
+ None +
bond_featurizer + Optional[Callable] + +
+

atom featurizer to use

+
+
+ None +
self_loop + bool + +
+

whether to use self loop or not

+
+
+ False +
explicit_hydrogens + bool + +
+

Whether to use explicit hydrogen in preprocessing of the input molecule

+
+
+ False +
canonical_atom_order + bool + +
+

Whether to use a canonical ordering of the atoms

+
+
+ True +
complete_graph + bool + +
+

Whether to use a complete graph constructor or not

+
+
+ False +
num_virtual_nodes + int + +
+

number of virtual nodes to add

+
+
+ 0 +
n_jobs + int + +
+

Number of job to run in parallel. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to True.

+
+
+ False +
dtype + Optional[Callable] + +
+

Output data type. Defaults to None, where numpy arrays are returned.

+
+
+ None +
+ +
+ Source code in molfeat/trans/graph/adj.py +
461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
def __init__(
+    self,
+    atom_featurizer: Optional[Callable] = None,
+    bond_featurizer: Optional[Callable] = None,
+    self_loop: bool = False,
+    explicit_hydrogens: bool = False,
+    canonical_atom_order: bool = True,
+    complete_graph: bool = False,
+    num_virtual_nodes: int = 0,
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Optional[Callable] = None,
+    **params,
+):
+    """
+    Adjacency graph transformer
+
+    Args:
+       atom_featurizer: atom featurizer to use
+       bond_featurizer: atom featurizer to use
+       self_loop: whether to use self loop or not
+       explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+       canonical_atom_order: Whether to use a canonical ordering of the atoms
+       complete_graph: Whether to use a complete graph constructor or not
+       num_virtual_nodes: number of virtual nodes to add
+       n_jobs: Number of job to run in parallel. Defaults to 1.
+       verbose: Verbosity level. Defaults to True.
+       dtype: Output data type. Defaults to None, where numpy arrays are returned.
+    """
+
+    super().__init__(
+        atom_featurizer=atom_featurizer,
+        bond_featurizer=bond_featurizer,
+        n_jobs=n_jobs,
+        self_loop=self_loop,
+        num_virtual_nodes=num_virtual_nodes,
+        complete_graph=complete_graph,
+        verbose=verbose,
+        dtype=dtype,
+        canonical_atom_order=canonical_atom_order,
+        explicit_hydrogens=explicit_hydrogens,
+        **params,
+    )
+
+    if not requires.check("dgllife"):
+        logger.error(
+            "Cannot find dgllife. It's required for some features. Please install it first !"
+        )
+    if not requires.check("dgl"):
+        raise ValueError("Cannot find dgl, please install it first !")
+    if self.dtype is not None and not datatype.is_dtype_tensor(self.dtype):
+        raise ValueError("DGL featurizer only supports torch tensors currently")
+
+
+
+ +
+ + +
+ + + + +

+ __recast(featurizer) + +

+ + +
+ +

Recast the output of a featurizer to the transformer underlying type

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer + Callable + +
+

featurizer to patch

+
+
+ required +
+ +
+ Source code in molfeat/trans/graph/adj.py +
643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
def __recast(self, featurizer: Callable):
+    """Recast the output of a featurizer to the transformer underlying type
+
+    Args:
+        featurizer: featurizer to patch
+    """
+    if featurizer is None:
+        return None
+    dtype = self.dtype or torch.float
+
+    def patch_feats(*args, **kwargs):
+        out_dict = featurizer(*args, **kwargs)
+        out_dict = {k: datatype.cast(val, dtype=dtype) for k, val in out_dict.items()}
+        return out_dict
+
+    return patch_feats
+
+
+
+ +
+ + +
+ + + + +

+ auto_self_loop() + +

+ + +
+ +

Patch the featurizer to auto support self loop based on the bond featurizer characteristics

+ +
+ Source code in molfeat/trans/graph/adj.py +
514
+515
+516
+517
+518
def auto_self_loop(self):
+    """Patch the featurizer to auto support self loop based on the bond featurizer characteristics"""
+    super().auto_self_loop()
+    if isinstance(self.bond_featurizer, EdgeMatCalculator):
+        self.self_loop = True
+
+
+
+ +
+ + +
+ + + + +

+ get_collate_fn(*args, **kwargs) + +

+ + +
+ +

Return DGL collate function for a batch of molecular graph

+ +
+ Source code in molfeat/trans/graph/adj.py +
520
+521
+522
def get_collate_fn(self, *args, **kwargs):
+    """Return DGL collate function for a batch of molecular graph"""
+    return self._dgl_collate
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ DistGraphTransformer3D + + +

+ + +
+

+ Bases: AdjGraphTransformer

+ + +

Graph featurizer using the 3D distance between pair of atoms for the adjacency matrix +The self_loop attribute is ignored here as the distance between an atom and itself is 0.

+ +
+ Source code in molfeat/trans/graph/adj.py +
435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
+449
+450
+451
+452
class DistGraphTransformer3D(AdjGraphTransformer):
+    """
+    Graph featurizer using the 3D distance between pair of atoms for the adjacency matrix
+    The `self_loop` attribute is ignored here as the distance between an atom and itself is 0.
+
+    """
+
+    @requires_conformer
+    def _graph_featurizer(self, mol: dm.Mol):
+        """Graph topological distance featurizer
+
+        Args:
+            mol: molecule to transform into a graph
+
+        Returns:
+            mat : N,N matrix representing the graph
+        """
+        return Get3DDistanceMatrix(mol)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ +
+ + + + +

+ GraphTransformer + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Base class for all graph transformers including DGL

+ +
+ Source code in molfeat/trans/graph/adj.py +
 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
class GraphTransformer(MoleculeTransformer):
+    """
+    Base class for all graph transformers including DGL
+    """
+
+    def __init__(
+        self,
+        atom_featurizer: Optional[Callable] = None,
+        bond_featurizer: Optional[Callable] = None,
+        explicit_hydrogens: bool = False,
+        canonical_atom_order: bool = True,
+        self_loop: bool = False,
+        n_jobs: int = 1,
+        verbose: bool = False,
+        dtype: Optional[Callable] = None,
+        **params,
+    ):
+        """Mol to Graph transformer base class
+
+        Args:
+            atom_featurizer: atom featurizer to use
+            bond_featurizer: atom featurizer to use
+            explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+            canonical_atom_order: Whether to use a canonical ordering of the atoms
+            self_loop: Whether to add self loops or not
+            n_jobs: Number of job to run in parallel. Defaults to 1.
+            verbose: Verbosity level. Defaults to True.
+            dtype: Output data type. Defaults to None
+        """
+
+        self._save_input_args()
+
+        super().__init__(
+            n_jobs=n_jobs,
+            verbose=verbose,
+            dtype=dtype,
+            featurizer="none",
+            self_loop=self_loop,
+            canonical_atom_order=canonical_atom_order,
+            explicit_hydrogens=explicit_hydrogens,
+            **params,
+        )
+        if atom_featurizer is None:
+            atom_featurizer = AtomCalculator()
+        self.atom_featurizer = atom_featurizer
+        self.bond_featurizer = bond_featurizer
+        self._atom_dim = None
+        self._bond_dim = None
+
+    def auto_self_loop(self):
+        """Patch the featurizer to auto support self loop based on the bond featurizer characteristics"""
+        bf_self_loop = None
+        if self.bond_featurizer is not None:
+            bf_self_loop = getattr(self.bond_featurizer, "self_loop", None)
+            bf_self_loop = bf_self_loop or getattr(self.bond_featurizer, "_self_loop", None)
+        if bf_self_loop is not None:
+            self.self_loop = bf_self_loop
+
+    def preprocess(self, inputs, labels=None):
+        """Preprocess list of input molecules
+        Args:
+            labels: For compatibility
+        """
+        inputs, labels = super().preprocess(inputs, labels)
+        new_inputs = []
+        for m in inputs:
+            try:
+                mol = dm.to_mol(
+                    m, add_hs=self.explicit_hydrogens, ordered=self.canonical_atom_order
+                )
+            except Exception:
+                mol = None
+            new_inputs.append(mol)
+
+        return new_inputs, labels
+
+    def fit(self, **fit_params):
+        """Fit the current transformer on given dataset."""
+        if self.verbose:
+            logger.error("GraphTransformer featurizers cannot be fitted !")
+        return self
+
+    @property
+    def atom_dim(self):
+        r"""
+        Get the number of features per atom
+
+        Returns:
+            atom_dim (int): Number of atom features
+        """
+        if self._atom_dim is None:
+            try:
+                self._atom_dim = len(self.atom_featurizer)
+            except Exception:
+                _toy_mol = dm.to_mol("C")
+                out = self.atom_featurizer(_toy_mol)
+                self._atom_dim = sum([x.shape[-1] for x in out.values()])
+        return self._atom_dim
+
+    @property
+    def bond_dim(self):
+        r"""
+        Get the number of features for a bond
+
+        Returns:
+            bond_dim (int): Number of bond features
+        """
+        if self.bond_featurizer is None:
+            self._bond_dim = 0
+        if self._bond_dim is None:
+            try:
+                self._bond_dim = len(self.bond_featurizer)
+            except Exception:
+                _toy_mol = dm.to_mol("CO")
+                out = self.bond_featurizer(_toy_mol)
+                self._bond_dim = sum([x.shape[-1] for x in out.values()])
+        return self._bond_dim
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Compute features for a single molecule.
+        This method would potentially need to be reimplemented by child classes
+
+        Args:
+            mol: molecule to transform into features
+
+        Returns
+            feat: featurized input molecule
+
+        """
+        raise NotImplementedError
+
+    def __call__(self, mols: List[Union[dm.Mol, str]], ignore_errors: bool = False, **kwargs):
+        r"""
+        Calculate features for molecules. Using __call__, instead of transform.
+        Note that most Transfomers allow you to specify
+        a return datatype.
+
+        Args:
+            mols:  Mol or SMILES of the molecules to be transformed
+            ignore_errors: Whether to ignore errors during featurization or raise an error.
+            kwargs: Named parameters for the transform method
+
+        Returns:
+            feats: list of valid features
+            ids: all valid molecule positions that did not failed during featurization
+                Only returned when ignore_errors is True.
+
+        """
+        features = self.transform(mols, ignore_errors=ignore_errors, **kwargs)
+        if not ignore_errors:
+            return features
+        features, ids = self._filter_none(features)
+        return features, ids
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ atom_dim + + + property + + +

+ + +
+ +

Get the number of features per atom

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
atom_dim + int + +
+

Number of atom features

+
+
+
+ +
+ +
+ + + + +

+ bond_dim + + + property + + +

+ + +
+ +

Get the number of features for a bond

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
bond_dim + int + +
+

Number of bond features

+
+
+
+ +
+ + + + +
+ + + + +

+ __call__(mols, ignore_errors=False, **kwargs) + +

+ + +
+ +

Calculate features for molecules. Using call, instead of transform. +Note that most Transfomers allow you to specify +a return datatype.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

Mol or SMILES of the molecules to be transformed

+
+
+ required +
ignore_errors + bool + +
+

Whether to ignore errors during featurization or raise an error.

+
+
+ False +
kwargs + +
+

Named parameters for the transform method

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
feats + +
+

list of valid features

+
+
ids + +
+

all valid molecule positions that did not failed during featurization +Only returned when ignore_errors is True.

+
+
+ +
+ Source code in molfeat/trans/graph/adj.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
def __call__(self, mols: List[Union[dm.Mol, str]], ignore_errors: bool = False, **kwargs):
+    r"""
+    Calculate features for molecules. Using __call__, instead of transform.
+    Note that most Transfomers allow you to specify
+    a return datatype.
+
+    Args:
+        mols:  Mol or SMILES of the molecules to be transformed
+        ignore_errors: Whether to ignore errors during featurization or raise an error.
+        kwargs: Named parameters for the transform method
+
+    Returns:
+        feats: list of valid features
+        ids: all valid molecule positions that did not failed during featurization
+            Only returned when ignore_errors is True.
+
+    """
+    features = self.transform(mols, ignore_errors=ignore_errors, **kwargs)
+    if not ignore_errors:
+        return features
+    features, ids = self._filter_none(features)
+    return features, ids
+
+
+
+ +
+ + +
+ + + + +

+ __init__(atom_featurizer=None, bond_featurizer=None, explicit_hydrogens=False, canonical_atom_order=True, self_loop=False, n_jobs=1, verbose=False, dtype=None, **params) + +

+ + +
+ +

Mol to Graph transformer base class

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
atom_featurizer + Optional[Callable] + +
+

atom featurizer to use

+
+
+ None +
bond_featurizer + Optional[Callable] + +
+

atom featurizer to use

+
+
+ None +
explicit_hydrogens + bool + +
+

Whether to use explicit hydrogen in preprocessing of the input molecule

+
+
+ False +
canonical_atom_order + bool + +
+

Whether to use a canonical ordering of the atoms

+
+
+ True +
self_loop + bool + +
+

Whether to add self loops or not

+
+
+ False +
n_jobs + int + +
+

Number of job to run in parallel. Defaults to 1.

+
+
+ 1 +
verbose + bool + +
+

Verbosity level. Defaults to True.

+
+
+ False +
dtype + Optional[Callable] + +
+

Output data type. Defaults to None

+
+
+ None +
+ +
+ Source code in molfeat/trans/graph/adj.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
def __init__(
+    self,
+    atom_featurizer: Optional[Callable] = None,
+    bond_featurizer: Optional[Callable] = None,
+    explicit_hydrogens: bool = False,
+    canonical_atom_order: bool = True,
+    self_loop: bool = False,
+    n_jobs: int = 1,
+    verbose: bool = False,
+    dtype: Optional[Callable] = None,
+    **params,
+):
+    """Mol to Graph transformer base class
+
+    Args:
+        atom_featurizer: atom featurizer to use
+        bond_featurizer: atom featurizer to use
+        explicit_hydrogens: Whether to use explicit hydrogen in preprocessing of the input molecule
+        canonical_atom_order: Whether to use a canonical ordering of the atoms
+        self_loop: Whether to add self loops or not
+        n_jobs: Number of job to run in parallel. Defaults to 1.
+        verbose: Verbosity level. Defaults to True.
+        dtype: Output data type. Defaults to None
+    """
+
+    self._save_input_args()
+
+    super().__init__(
+        n_jobs=n_jobs,
+        verbose=verbose,
+        dtype=dtype,
+        featurizer="none",
+        self_loop=self_loop,
+        canonical_atom_order=canonical_atom_order,
+        explicit_hydrogens=explicit_hydrogens,
+        **params,
+    )
+    if atom_featurizer is None:
+        atom_featurizer = AtomCalculator()
+    self.atom_featurizer = atom_featurizer
+    self.bond_featurizer = bond_featurizer
+    self._atom_dim = None
+    self._bond_dim = None
+
+
+
+ +
+ + +
+ + + + +

+ auto_self_loop() + +

+ + +
+ +

Patch the featurizer to auto support self loop based on the bond featurizer characteristics

+ +
+ Source code in molfeat/trans/graph/adj.py +
84
+85
+86
+87
+88
+89
+90
+91
def auto_self_loop(self):
+    """Patch the featurizer to auto support self loop based on the bond featurizer characteristics"""
+    bf_self_loop = None
+    if self.bond_featurizer is not None:
+        bf_self_loop = getattr(self.bond_featurizer, "self_loop", None)
+        bf_self_loop = bf_self_loop or getattr(self.bond_featurizer, "_self_loop", None)
+    if bf_self_loop is not None:
+        self.self_loop = bf_self_loop
+
+
+
+ +
+ + +
+ + + + +

+ fit(**fit_params) + +

+ + +
+ +

Fit the current transformer on given dataset.

+ +
+ Source code in molfeat/trans/graph/adj.py +
111
+112
+113
+114
+115
def fit(self, **fit_params):
+    """Fit the current transformer on given dataset."""
+    if self.verbose:
+        logger.error("GraphTransformer featurizers cannot be fitted !")
+    return self
+
+
+
+ +
+ + +
+ + + + +

+ preprocess(inputs, labels=None) + +

+ + +
+ +

Preprocess list of input molecules +Args: + labels: For compatibility

+ +
+ Source code in molfeat/trans/graph/adj.py +
 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
def preprocess(self, inputs, labels=None):
+    """Preprocess list of input molecules
+    Args:
+        labels: For compatibility
+    """
+    inputs, labels = super().preprocess(inputs, labels)
+    new_inputs = []
+    for m in inputs:
+        try:
+            mol = dm.to_mol(
+                m, add_hs=self.explicit_hydrogens, ordered=self.canonical_atom_order
+            )
+        except Exception:
+            mol = None
+        new_inputs.append(mol)
+
+    return new_inputs, labels
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ PYGGraphTransformer + + +

+ + +
+

+ Bases: AdjGraphTransformer

+ + +

Graph transformer for the PYG models

+ +
+ Source code in molfeat/trans/graph/adj.py +
661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
+687
+688
+689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
class PYGGraphTransformer(AdjGraphTransformer):
+    """Graph transformer for the PYG models"""
+
+    def _graph_featurizer(self, mol: dm.Mol):
+        # we have used bond_calculator, therefore we need to
+        # go over the molecules and fetch the proper bond info from the atom idx
+        if self.bond_featurizer is None or (
+            isinstance(self.bond_featurizer, EdgeMatCalculator)
+            or hasattr(self.bond_featurizer, "pairwise_atom_funcs")
+        ):
+            graph = super()._graph_featurizer(mol)
+            (rows, cols) = np.nonzero(graph)
+            return np.vstack((rows, cols))
+
+        # we have a regular bond calculator here instead of all pairwise atoms
+        graph = []
+        for i in range(mol.GetNumBonds()):
+            bond = mol.GetBondWithIdx(i)
+            a_idx_1 = bond.GetBeginAtomIdx()
+            a_idx_2 = bond.GetEndAtomIdx()
+            graph += [[a_idx_1, a_idx_2], [a_idx_2, a_idx_1]]
+        if getattr(self.bond_featurizer, "_self_loop", False):
+            graph.extend([[atom_ind, atom_ind] for atom_ind in range(mol.GetNumAtoms())])
+        graph = np.asarray(graph).T
+        return graph
+
+    def _convert_feat_to_data_point(
+        self,
+        graph: np.ndarray,
+        node_feat: np.ndarray,
+        bond_feat: Optional[np.ndarray] = None,
+    ):
+        """Convert extracted graph features to a pyg Data object
+        Args:
+            graph: graph adjacency matrix
+            node_feat: node features
+            bond_feat: bond features
+
+        Returns:
+            datapoint: a pyg Data object
+        """
+        node_feat = torch.tensor(node_feat, dtype=torch.float32)
+        # construct edge index array E of shape (2, n_edges)
+        graph = torch.LongTensor(graph).view(2, -1)
+
+        if bond_feat is not None:
+            bond_feat = torch.tensor(bond_feat, dtype=torch.float32)
+            if bond_feat.ndim == 3:
+                bond_feat = bond_feat[graph[0, :], graph[1, :]]
+
+        d = Data(x=node_feat, edge_index=graph, edge_attr=bond_feat)
+        return d
+
+    def transform(self, mols: List[Union[dm.Mol, str]], **kwargs):
+        r"""
+        Compute the graph featurization for a set of molecules.
+
+        Args:
+            mols: a list containing smiles or mol objects
+            kwargs: arguments to pass to the `super().transform`
+
+         Returns:
+             features: a list of Data point for each molecule in the input set
+        """
+        features = super().transform(mols, keep_dict=False, **kwargs)
+        return [self._convert_feat_to_data_point(*feat) for feat in features]
+
+    def get_collate_fn(
+        self,
+        follow_batch: Optional[List[str]] = None,
+        exclude_keys: Optional[List[str]] = None,
+        return_pair: Optional[bool] = True,
+        **kwargs,
+    ):
+        """
+        Get collate function for pyg graphs
+
+        Args:
+            follow_batch: Creates assignment batch vectors for each key in the list. (default: :obj:`None`)
+            exclude_keys: Will exclude each key in the list. (default: :obj:`None`)
+            return_pair: whether to return a pair of X,y or a databatch (default: :obj:`True`)
+
+        Returns:
+            Collated samples.
+        """
+        collator = Collater(follow_batch=follow_batch, exclude_keys=exclude_keys)
+        return partial(self._collate_batch, collator=collator, return_pair=return_pair)
+
+    @staticmethod
+    def _collate_batch(batch, collator: Callable, return_pair: bool = False, **kwargs):
+        """
+        Collate a batch of samples.
+
+        Args:
+            batch: Batch of samples.
+            collator: collator function
+            return_pair: whether to return a pair of (X,y) a databatch
+        Returns:
+            Collated samples.
+        """
+        if isinstance(batch[0], (list, tuple)) and len(batch[0]) > 1:
+            graphs, labels = map(list, zip(*batch))
+            for graph, label in zip(graphs, labels):
+                graph.y = label
+            batch = graphs
+        batch = collator(batch)
+        if return_pair:
+            return (batch, batch.y)
+        return batch
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ get_collate_fn(follow_batch=None, exclude_keys=None, return_pair=True, **kwargs) + +

+ + +
+ +

Get collate function for pyg graphs

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
follow_batch + Optional[List[str]] + +
+

Creates assignment batch vectors for each key in the list. (default: :obj:None)

+
+
+ None +
exclude_keys + Optional[List[str]] + +
+

Will exclude each key in the list. (default: :obj:None)

+
+
+ None +
return_pair + Optional[bool] + +
+

whether to return a pair of X,y or a databatch (default: :obj:True)

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

Collated samples.

+
+
+ +
+ Source code in molfeat/trans/graph/adj.py +
728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
def get_collate_fn(
+    self,
+    follow_batch: Optional[List[str]] = None,
+    exclude_keys: Optional[List[str]] = None,
+    return_pair: Optional[bool] = True,
+    **kwargs,
+):
+    """
+    Get collate function for pyg graphs
+
+    Args:
+        follow_batch: Creates assignment batch vectors for each key in the list. (default: :obj:`None`)
+        exclude_keys: Will exclude each key in the list. (default: :obj:`None`)
+        return_pair: whether to return a pair of X,y or a databatch (default: :obj:`True`)
+
+    Returns:
+        Collated samples.
+    """
+    collator = Collater(follow_batch=follow_batch, exclude_keys=exclude_keys)
+    return partial(self._collate_batch, collator=collator, return_pair=return_pair)
+
+
+
+ +
+ + +
+ + + + +

+ transform(mols, **kwargs) + +

+ + +
+ +

Compute the graph featurization for a set of molecules.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

a list containing smiles or mol objects

+
+
+ required +
kwargs + +
+

arguments to pass to the super().transform

+
+
+ {} +
+

Returns: + features: a list of Data point for each molecule in the input set

+ +
+ Source code in molfeat/trans/graph/adj.py +
714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
def transform(self, mols: List[Union[dm.Mol, str]], **kwargs):
+    r"""
+    Compute the graph featurization for a set of molecules.
+
+    Args:
+        mols: a list containing smiles or mol objects
+        kwargs: arguments to pass to the `super().transform`
+
+     Returns:
+         features: a list of Data point for each molecule in the input set
+    """
+    features = super().transform(mols, keep_dict=False, **kwargs)
+    return [self._convert_feat_to_data_point(*feat) for feat in features]
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ TopoDistGraphTransformer + + +

+ + +
+

+ Bases: AdjGraphTransformer

+ + +

Graph featurizer using the topological distance between each pair +of nodes instead of the adjacency matrix.

+

The self_loop attribute is ignored here as the distance between an atom and itself is 0.

+ +
+ Source code in molfeat/trans/graph/adj.py +
415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
class TopoDistGraphTransformer(AdjGraphTransformer):
+    """
+    Graph featurizer using the topological distance between each pair
+    of nodes instead of the adjacency matrix.
+
+    The `self_loop` attribute is ignored here as the distance between an atom and itself is 0.
+    """
+
+    def _graph_featurizer(self, mol: dm.Mol):
+        """Graph topological distance featurizer
+
+        Args:
+            mol: molecule to transform into a graph
+
+        Returns:
+            mat : N,N matrix representing the graph
+        """
+        return GetDistanceMatrix(mol)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Tree

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ MolTreeDecompositionTransformer + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Transforms a molecule into a tree structure whose nodes correspond to different functional groups.

+ +
+ Source code in molfeat/trans/graph/tree.py +
 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
class MolTreeDecompositionTransformer(MoleculeTransformer):
+    r"""
+    Transforms a molecule into a tree structure whose nodes correspond to different functional groups.
+    """
+
+    def __init__(
+        self,
+        vocab: Optional[Iterable] = None,
+        one_hot: bool = False,
+        dtype: Optional[Callable] = None,
+        cache: bool = True,
+        **params,
+    ):
+        """MolTree featurizer
+
+        Args:
+            vocab: List of the smiles of the functional groups or clusters.
+                If None, the transformer should be fiited before any usage.
+            one_hot (bool, optional): Whether or not for a tree a 1d array or a 2d array is returned as features
+                If 1d array, vocabulary elements are mapped into integers,
+                otherwise, vocabulary elements  ar mapped into one-hot vectors
+            cache: Whether to cache the tree decomposition to avoid recomputing for seen molecules
+            dtype: Output data type. Defaults to None
+
+        Attributes:
+            vocab: Mapping from clusters to integers
+            vocab_size: The number of clusters + 1
+            one_hot: Whether or not for a sequence a 1d array or a 2d array is returned as features
+        """
+
+        self._save_input_args()
+
+        super().__init__(
+            dtype=dtype,
+            one_hot=one_hot,
+            cache=cache,
+            featurizer=TreeDecomposer(cache=cache),
+            **params,
+        )
+        if vocab is not None:
+            self.vocab = vocab
+            self._vocab_size = len(self.vocab) + 1
+            self._fitted = True
+        else:
+            self.vocab = None
+            self._vocab_size = None
+            self._fitted = False
+
+        if not requires.check("dgl"):
+            raise ValueError("dgl is required for this featurizer, please install it first")
+
+        if self.dtype is not None and not datatype.is_dtype_tensor(self.dtype):
+            raise ValueError("DGL featurizer only supports torch tensors currently")
+
+    @property
+    def vocab_size(self):
+        """Compute vocab size of this featurizer
+
+        Returns:
+            size: vocab size
+        """
+        return self._vocab_size
+
+    def fit(
+        self,
+        X: List[Union[dm.Mol, str]],
+        y: Optional[list] = None,
+        output_file: Optional[os.PathLike] = None,
+        **fit_params,
+    ):
+        """Fit the current transformer on given dataset.
+
+        The goal of fitting is for example to identify nan columns values
+        that needs to be removed from the dataset
+
+        Args:
+            X: input list of molecules
+            y (list, optional): Optional list of molecular properties. Defaults to None.
+            output_file: path to a file that will be used to store the generated set of fragments.
+            fit_params: key val of additional fit parameters
+
+
+        Returns:
+            self: MolTransformer instance after fitting
+        """
+        if self.vocab is not None:
+            logger.warning("The previous vocabulary of fragments will be erased.")
+        self.vocab = self.featurizer.get_vocab(X, output_file=output_file, log=self.verbose)
+        self._vocab_size = len(self.vocab) + 1
+        self._fitted = True
+
+        # save the vocab in the state
+        self._input_args["vocab"] = self.vocab
+
+        return self
+
+    def _transform(self, mol: dm.Mol):
+        r"""
+        Compute features for a single molecule.
+        This method would potentially need to be reimplemented by child classes
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns
+            feat: featurized input molecule
+
+        """
+        if not self._fitted:
+            raise ValueError(
+                "Need to call the fit function before any transformation. \
+                Or provide the fragments vocabulary at the object construction"
+            )
+
+        try:
+            _, edges, fragments = self.featurizer(mol)
+            n_nodes = len(fragments)
+            enc = [self.vocab.index(f) + 1 if f in self.vocab else 0 for f in fragments]
+            enc = datatype.cast(enc, (self.dtype or torch.long))
+            graph = dgl.graph(([], []))
+            graph.add_nodes(n_nodes)
+            for edge in edges:
+                graph.add_edges(*edge)
+                graph.add_edges(*edge[::-1])
+
+            if self.one_hot:
+                enc = [one_hot_encoding(f, self.vocab, encode_unknown=True) for f in fragments]
+                enc = np.asarray(enc)
+                enc = datatype.cast(enc, (self.dtype or torch.float))
+
+            graph.ndata["hv"] = enc
+        except Exception as e:
+            raise e
+            if self.verbose:
+                logger.error(e)
+            graph = None
+        return graph
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ vocab_size + + + property + + +

+ + +
+ +

Compute vocab size of this featurizer

+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
size + +
+

vocab size

+
+
+
+ +
+ + + + +
+ + + + +

+ __init__(vocab=None, one_hot=False, dtype=None, cache=True, **params) + +

+ + +
+ +

MolTree featurizer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
vocab + Optional[Iterable] + +
+

List of the smiles of the functional groups or clusters. +If None, the transformer should be fiited before any usage.

+
+
+ None +
one_hot + bool + +
+

Whether or not for a tree a 1d array or a 2d array is returned as features +If 1d array, vocabulary elements are mapped into integers, +otherwise, vocabulary elements ar mapped into one-hot vectors

+
+
+ False +
cache + bool + +
+

Whether to cache the tree decomposition to avoid recomputing for seen molecules

+
+
+ True +
dtype + Optional[Callable] + +
+

Output data type. Defaults to None

+
+
+ None +
+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
vocab + +
+

Mapping from clusters to integers

+
+
vocab_size + +
+

The number of clusters + 1

+
+
one_hot + +
+

Whether or not for a sequence a 1d array or a 2d array is returned as features

+
+
+ +
+ Source code in molfeat/trans/graph/tree.py +
27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
def __init__(
+    self,
+    vocab: Optional[Iterable] = None,
+    one_hot: bool = False,
+    dtype: Optional[Callable] = None,
+    cache: bool = True,
+    **params,
+):
+    """MolTree featurizer
+
+    Args:
+        vocab: List of the smiles of the functional groups or clusters.
+            If None, the transformer should be fiited before any usage.
+        one_hot (bool, optional): Whether or not for a tree a 1d array or a 2d array is returned as features
+            If 1d array, vocabulary elements are mapped into integers,
+            otherwise, vocabulary elements  ar mapped into one-hot vectors
+        cache: Whether to cache the tree decomposition to avoid recomputing for seen molecules
+        dtype: Output data type. Defaults to None
+
+    Attributes:
+        vocab: Mapping from clusters to integers
+        vocab_size: The number of clusters + 1
+        one_hot: Whether or not for a sequence a 1d array or a 2d array is returned as features
+    """
+
+    self._save_input_args()
+
+    super().__init__(
+        dtype=dtype,
+        one_hot=one_hot,
+        cache=cache,
+        featurizer=TreeDecomposer(cache=cache),
+        **params,
+    )
+    if vocab is not None:
+        self.vocab = vocab
+        self._vocab_size = len(self.vocab) + 1
+        self._fitted = True
+    else:
+        self.vocab = None
+        self._vocab_size = None
+        self._fitted = False
+
+    if not requires.check("dgl"):
+        raise ValueError("dgl is required for this featurizer, please install it first")
+
+    if self.dtype is not None and not datatype.is_dtype_tensor(self.dtype):
+        raise ValueError("DGL featurizer only supports torch tensors currently")
+
+
+
+ +
+ + +
+ + + + +

+ fit(X, y=None, output_file=None, **fit_params) + +

+ + +
+ +

Fit the current transformer on given dataset.

+

The goal of fitting is for example to identify nan columns values +that needs to be removed from the dataset

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
X + List[Union[Mol, str]] + +
+

input list of molecules

+
+
+ required +
y + list + +
+

Optional list of molecular properties. Defaults to None.

+
+
+ None +
output_file + Optional[PathLike] + +
+

path to a file that will be used to store the generated set of fragments.

+
+
+ None +
fit_params + +
+

key val of additional fit parameters

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
self + +
+

MolTransformer instance after fitting

+
+
+ +
+ Source code in molfeat/trans/graph/tree.py +
 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
def fit(
+    self,
+    X: List[Union[dm.Mol, str]],
+    y: Optional[list] = None,
+    output_file: Optional[os.PathLike] = None,
+    **fit_params,
+):
+    """Fit the current transformer on given dataset.
+
+    The goal of fitting is for example to identify nan columns values
+    that needs to be removed from the dataset
+
+    Args:
+        X: input list of molecules
+        y (list, optional): Optional list of molecular properties. Defaults to None.
+        output_file: path to a file that will be used to store the generated set of fragments.
+        fit_params: key val of additional fit parameters
+
+
+    Returns:
+        self: MolTransformer instance after fitting
+    """
+    if self.vocab is not None:
+        logger.warning("The previous vocabulary of fragments will be erased.")
+    self.vocab = self.featurizer.get_vocab(X, output_file=output_file, log=self.verbose)
+    self._vocab_size = len(self.vocab) + 1
+    self._fitted = True
+
+    # save the vocab in the state
+    self._input_args["vocab"] = self.vocab
+
+    return self
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.pretrained.base.html b/0.9.7/api/molfeat.trans.pretrained.base.html new file mode 100644 index 0000000..251951a --- /dev/null +++ b/0.9.7/api/molfeat.trans.pretrained.base.html @@ -0,0 +1,2666 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Base Pretrained Models - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

Base Pretrained Models

+ +

Pretrained Model

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ PretrainedMolTransformer + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Transformer based on pretrained featurizer

+
+

Note

+
    +
  • When implementing a subclass of this class, you need to define the _embed and optionally the _convert methods.
  • +
  • If your model is an instance of PretrainedModel that handles loading of the model from a store or through a complex mechanism + then you can decide whether you want to preload the true underlying model. You will be in charge of handling the logic of when you need to call preload, and when you don't. + Note however that by default preloading is only attempted when the featurizer is still an instance of PretrainedModel.
  • +
+
+

Attributes + featurizer (object): featurizer object + dtype (type, optional): Data type. Use call instead + precompute_cache: (bool, optional): Whether to precompute the features into a local cache. Defaults to False. + Note that due to molecular hashing, some pretrained featurizers might be better off just not using any cache as they can be faster. + Furthermore, the cache is not saved when pickling the object. If you want to save the cache, you need to save the object separately. + _require_mols (bool): Whether the embedding takes mols or smiles as input + preload: whether to preload the pretrained model from the store (if available) during initialization.

+ +
+ Source code in molfeat/trans/pretrained/base.py +
 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
class PretrainedMolTransformer(MoleculeTransformer):
+    r"""
+    Transformer based on pretrained featurizer
+
+    !!! note
+        * When implementing a subclass of this class, you need to define the `_embed` and optionally the `_convert` methods.
+        * If your model is an instance of PretrainedModel that handles loading of the model from a store or through a complex mechanism
+          then you can decide whether you want to preload the true underlying model. You will be in charge of handling the logic of when you need to call preload, and when you don't.
+          Note however that by default preloading is only attempted when the featurizer is still an instance of PretrainedModel.
+
+
+    Attributes
+        featurizer (object): featurizer object
+        dtype (type, optional): Data type. Use call instead
+        precompute_cache: (bool, optional): Whether to precompute the features into a local cache. Defaults to False.
+            Note that due to molecular hashing, some pretrained featurizers might be better off just not using any cache as they can be faster.
+            Furthermore, the cache is not saved when pickling the object. If you want to save the cache, you need to save the object separately.
+        _require_mols (bool): Whether the embedding takes mols or smiles as input
+        preload: whether to preload the pretrained model from the store (if available) during initialization.
+
+    """
+
+    def __init__(
+        self,
+        dtype: Optional[Callable] = None,
+        precompute_cache: Optional[Union[bool, DataCache]] = None,
+        preload: bool = False,
+        **params,
+    ):
+        self._save_input_args()
+
+        featurizer = params.pop("featurizer", None)
+        super().__init__(dtype=dtype, featurizer="none", **params)
+        self.featurizer = featurizer
+        self._require_mols = False
+        self.preload = preload
+        self._feat_length = None
+        if precompute_cache is False:
+            precompute_cache = None
+        if precompute_cache is True:
+            name = str(self.__class__.__name__)
+            precompute_cache = DataCache(name=name)
+        self.precompute_cache = precompute_cache
+
+    def set_cache(self, cache: DataCache):
+        """Set the cache for the transformer
+
+        Args:
+            cache: cache object
+        """
+        self.precompute_cache = cache
+
+    def _get_param_names(self):
+        """Get parameter names for the estimator"""
+        out = self._input_params.keys()
+        out = [x for x in out if x != "featurizer"]
+        return out
+
+    def _embed(self, smiles: str, **kwargs):
+        """Compute molecular embeddings for input list of smiles
+        This functiom takes a list of smiles or molecules and return the featurization
+        corresponding to the inputs.  In `transform` and `_transform`, this function is
+        called after calling `_convert`
+
+        Args:
+            smiles: input smiles
+        """
+        raise NotImplementedError
+
+    def _preload(self):
+        """Preload the pretrained model for later queries"""
+        if self.featurizer is not None and isinstance(self.featurizer, PretrainedModel):
+            self.featurizer = self.featurizer.load()
+            self.preload = True
+
+    def __getstate__(self):
+        """Getting state to allow pickling"""
+        d = copy.deepcopy(self.__dict__)
+        d["precompute_cache"] = None
+        if isinstance(getattr(self, "featurizer", None), PretrainedModel) or self.preload:
+            d.pop("featurizer", None)
+        return d
+
+    def __setstate__(self, d):
+        """Setting state during reloading pickling"""
+        self.__dict__.update(d)
+        self._update_params()
+
+    def fit(self, *args, **kwargs):
+        return self
+
+    def _convert(self, inputs: list, **kwargs):
+        """Convert molecules to the right format
+
+        In `transform` and `_transform`, this function is called before calling `_embed`
+
+        Args:
+            inputs: inputs to preprocess
+
+        Returns:
+            processed: pre-processed input list
+        """
+        if not self._require_mols:
+            inputs = [dm.to_smiles(m) for m in inputs]
+        return inputs
+
+    def preprocess(self, inputs: list, labels: Optional[list] = None):
+        """Run preprocessing on the input data
+        Args:
+            inputs: list of input molecules
+            labels: list of labels
+        """
+        out = super().preprocess(inputs, labels)
+        if self.precompute_cache not in [False, None]:
+            try:
+                self.transform(inputs)
+            except Exception:
+                pass
+        return out
+
+    def _transform(self, mol: dm.Mol, **kwargs):
+        r"""
+        Compute features for a single molecule.
+        This method would potentially need to be reimplemented by any child class
+
+        Args:
+            mol (dm.Mol): molecule to transform into features
+
+        Returns
+            feat: featurized input molecule
+
+        """
+        feat = None
+        if self.precompute_cache is not None:
+            feat = self.precompute_cache.get(mol)
+        if feat is None:
+            try:
+                mols = [dm.to_mol(mol)]
+                mols = self._convert(mols, **kwargs)
+                feat = self._embed(mols, **kwargs)
+                feat = feat[0]
+            except Exception as e:
+                if self.verbose:
+                    logger.error(e)
+
+            if self.precompute_cache is not None:
+                self.precompute_cache[mol] = feat
+        return feat
+
+    def transform(self, smiles: List[str], **kwargs):
+        """Perform featurization of the input molecules
+
+        The featurization process is as follow:
+        1. convert the input molecules into the right format, expected by the pre-trained model using `_convert`
+        2. compute embedding of the molecule using `_embed`
+        3. perform any model-specific postprocessing and cache update
+
+        The dtype returned is the native datatype of the transformer.
+        Use `__call__` to get the dtype in the `dtype` attribute format
+
+        Args:
+            mols: a list containing smiles or mol objects
+
+        Returns:
+            out: featurized molecules
+        """
+        if isinstance(smiles, str) or not isinstance(smiles, Iterable):
+            smiles = [smiles]
+
+        n_mols = len(smiles)
+        ind_to_compute = dict(zip(range(n_mols), range(n_mols)))
+        pre_computed = [None] * n_mols
+
+        if self.precompute_cache not in [False, None]:
+            ind_to_compute = {}
+            pre_computed = self.precompute_cache.fetch(smiles)
+            ind = 0
+            for i, v in enumerate(pre_computed):
+                if v is None:
+                    ind_to_compute[i] = ind
+                    ind += 1
+
+        parallel_kwargs = getattr(self, "parallel_kwargs", {})
+        mols = dm.parallelized(
+            dm.to_mol, smiles, n_jobs=getattr(self, "n_jobs", 1), **parallel_kwargs
+        )
+        mols = [mols[i] for i in ind_to_compute]
+
+        if len(mols) > 0:
+            converted_mols = self._convert(mols, **kwargs)
+            out = self._embed(converted_mols, **kwargs)
+
+            if not isinstance(out, list):
+                out = list(out)
+
+            if self.precompute_cache is not None:
+                # cache value now
+                self.precompute_cache.update(dict(zip(mols, out)))
+        out = [
+            out[ind_to_compute[i]] if i in ind_to_compute else pre_computed[i]
+            for i in range(n_mols)
+        ]
+        return datatype.as_numpy_array_if_possible(out, self.dtype)
+
+    def __eq__(self, other):
+        if isinstance(other, self.__class__):
+            return str(self) == str(other)
+        return False
+
+    def _update_params(self):
+        self._fitted = False
+
+    def __len__(self):
+        if self._feat_length is None:
+            self._preload()
+            tmp_mol = dm.to_mol("CCC")
+            embs = self._transform(tmp_mol)
+            self._feat_length = len(embs)
+        return self._feat_length
+
+    def __ne__(self, other):
+        return not (self == other)
+
+    def __hash__(self):
+        return hash(repr(self))
+
+    def __repr__(self):
+        return "{}(dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Getting state to allow pickling

+ +
+ Source code in molfeat/trans/pretrained/base.py +
93
+94
+95
+96
+97
+98
+99
def __getstate__(self):
+    """Getting state to allow pickling"""
+    d = copy.deepcopy(self.__dict__)
+    d["precompute_cache"] = None
+    if isinstance(getattr(self, "featurizer", None), PretrainedModel) or self.preload:
+        d.pop("featurizer", None)
+    return d
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(d) + +

+ + +
+ +

Setting state during reloading pickling

+ +
+ Source code in molfeat/trans/pretrained/base.py +
101
+102
+103
+104
def __setstate__(self, d):
+    """Setting state during reloading pickling"""
+    self.__dict__.update(d)
+    self._update_params()
+
+
+
+ +
+ + +
+ + + + +

+ preprocess(inputs, labels=None) + +

+ + +
+ +

Run preprocessing on the input data +Args: + inputs: list of input molecules + labels: list of labels

+ +
+ Source code in molfeat/trans/pretrained/base.py +
124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
def preprocess(self, inputs: list, labels: Optional[list] = None):
+    """Run preprocessing on the input data
+    Args:
+        inputs: list of input molecules
+        labels: list of labels
+    """
+    out = super().preprocess(inputs, labels)
+    if self.precompute_cache not in [False, None]:
+        try:
+            self.transform(inputs)
+        except Exception:
+            pass
+    return out
+
+
+
+ +
+ + +
+ + + + +

+ set_cache(cache) + +

+ + +
+ +

Set the cache for the transformer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cache + DataCache + +
+

cache object

+
+
+ required +
+ +
+ Source code in molfeat/trans/pretrained/base.py +
62
+63
+64
+65
+66
+67
+68
def set_cache(self, cache: DataCache):
+    """Set the cache for the transformer
+
+    Args:
+        cache: cache object
+    """
+    self.precompute_cache = cache
+
+
+
+ +
+ + +
+ + + + +

+ transform(smiles, **kwargs) + +

+ + +
+ +

Perform featurization of the input molecules

+

The featurization process is as follow: +1. convert the input molecules into the right format, expected by the pre-trained model using _convert +2. compute embedding of the molecule using _embed +3. perform any model-specific postprocessing and cache update

+

The dtype returned is the native datatype of the transformer. +Use __call__ to get the dtype in the dtype attribute format

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + +
+

a list containing smiles or mol objects

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
out + +
+

featurized molecules

+
+
+ +
+ Source code in molfeat/trans/pretrained/base.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
def transform(self, smiles: List[str], **kwargs):
+    """Perform featurization of the input molecules
+
+    The featurization process is as follow:
+    1. convert the input molecules into the right format, expected by the pre-trained model using `_convert`
+    2. compute embedding of the molecule using `_embed`
+    3. perform any model-specific postprocessing and cache update
+
+    The dtype returned is the native datatype of the transformer.
+    Use `__call__` to get the dtype in the `dtype` attribute format
+
+    Args:
+        mols: a list containing smiles or mol objects
+
+    Returns:
+        out: featurized molecules
+    """
+    if isinstance(smiles, str) or not isinstance(smiles, Iterable):
+        smiles = [smiles]
+
+    n_mols = len(smiles)
+    ind_to_compute = dict(zip(range(n_mols), range(n_mols)))
+    pre_computed = [None] * n_mols
+
+    if self.precompute_cache not in [False, None]:
+        ind_to_compute = {}
+        pre_computed = self.precompute_cache.fetch(smiles)
+        ind = 0
+        for i, v in enumerate(pre_computed):
+            if v is None:
+                ind_to_compute[i] = ind
+                ind += 1
+
+    parallel_kwargs = getattr(self, "parallel_kwargs", {})
+    mols = dm.parallelized(
+        dm.to_mol, smiles, n_jobs=getattr(self, "n_jobs", 1), **parallel_kwargs
+    )
+    mols = [mols[i] for i in ind_to_compute]
+
+    if len(mols) > 0:
+        converted_mols = self._convert(mols, **kwargs)
+        out = self._embed(converted_mols, **kwargs)
+
+        if not isinstance(out, list):
+            out = list(out)
+
+        if self.precompute_cache is not None:
+            # cache value now
+            self.precompute_cache.update(dict(zip(mols, out)))
+    out = [
+        out[ind_to_compute[i]] if i in ind_to_compute else pre_computed[i]
+        for i in range(n_mols)
+    ]
+    return datatype.as_numpy_array_if_possible(out, self.dtype)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.pretrained.dgl_pretrained.html b/0.9.7/api/molfeat.trans.pretrained.dgl_pretrained.html new file mode 100644 index 0000000..a1c016e --- /dev/null +++ b/0.9.7/api/molfeat.trans.pretrained.dgl_pretrained.html @@ -0,0 +1,3071 @@ + + + + + + + + + + + + + + + + + + + + + + + + + DGL - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+ +
+
+ + + +
+
+ + + + + + + +

DGL

+ +

DGLLife pretrained models

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ DGLModel + + +

+ + +
+

+ Bases: PretrainedStoreModel

+ + +

Load one of the pretrained DGL models for molecular embedding:

+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
class DGLModel(PretrainedStoreModel):
+    r"""
+    Load one of the pretrained DGL models for molecular embedding:
+    """
+    AVAILABLE_MODELS = [
+        "gin_supervised_contextpred",
+        "gin_supervised_infomax",
+        "gin_supervised_edgepred",
+        "gin_supervised_masking",
+        "jtvae_zinc_no_kl",
+    ]
+
+    def __init__(
+        self,
+        name: str,
+        cache_path: Optional[os.PathLike] = None,
+        store: Optional[ModelStore] = None,
+    ):
+        super().__init__(name, cache_path=cache_path, store=store)
+        self._model = None
+
+    @classmethod
+    def available_models(cls, query: Optional[str] = None):
+        """List available models
+        Args:
+            query (str, optional): Query to filter the list of available models. Defaults to None.
+        """
+        if query is None:
+            return cls.AVAILABLE_MODELS
+        else:
+            return [x for x in cls.AVAILABLE_MODELS if re.search(query, x, re.IGNORECASE)]
+
+    @classmethod
+    def from_pretrained(cls, model_name: str):
+        """Load pretrained model using the dgllife API and not the store"""
+        if not requires.check("dgllife"):
+            raise ValueError("dgllife is not installed")
+        import dgllife
+
+        base_model = dgllife.model.load_pretrained(model_name)
+        model = DGLModel(name=model_name)
+        model.eval()
+        model._model = base_model
+        return model
+
+    def load(self):
+        """Load GIN model"""
+        if self._model is not None:
+            return self._model
+        download_output_dir = self._artifact_load(
+            name=self.name, download_path=self.cache_path, store=self.store
+        )
+        model_path = dm.fs.join(download_output_dir, self.store.MODEL_PATH_NAME)
+        with fsspec.open(model_path, "rb") as f:
+            model = joblib.load(f)
+        model.eval()
+        return model
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ available_models(query=None) + + + classmethod + + +

+ + +
+ +

List available models +Args: + query (str, optional): Query to filter the list of available models. Defaults to None.

+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
57
+58
+59
+60
+61
+62
+63
+64
+65
+66
@classmethod
+def available_models(cls, query: Optional[str] = None):
+    """List available models
+    Args:
+        query (str, optional): Query to filter the list of available models. Defaults to None.
+    """
+    if query is None:
+        return cls.AVAILABLE_MODELS
+    else:
+        return [x for x in cls.AVAILABLE_MODELS if re.search(query, x, re.IGNORECASE)]
+
+
+
+ +
+ + +
+ + + + +

+ from_pretrained(model_name) + + + classmethod + + +

+ + +
+ +

Load pretrained model using the dgllife API and not the store

+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
@classmethod
+def from_pretrained(cls, model_name: str):
+    """Load pretrained model using the dgllife API and not the store"""
+    if not requires.check("dgllife"):
+        raise ValueError("dgllife is not installed")
+    import dgllife
+
+    base_model = dgllife.model.load_pretrained(model_name)
+    model = DGLModel(name=model_name)
+    model.eval()
+    model._model = base_model
+    return model
+
+
+
+ +
+ + +
+ + + + +

+ load() + +

+ + +
+ +

Load GIN model

+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
def load(self):
+    """Load GIN model"""
+    if self._model is not None:
+        return self._model
+    download_output_dir = self._artifact_load(
+        name=self.name, download_path=self.cache_path, store=self.store
+    )
+    model_path = dm.fs.join(download_output_dir, self.store.MODEL_PATH_NAME)
+    with fsspec.open(model_path, "rb") as f:
+        model = joblib.load(f)
+    model.eval()
+    return model
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ PretrainedDGLTransformer + + +

+ + +
+

+ Bases: PretrainedMolTransformer

+ + +

DGL Pretrained transformer

+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
featurizer + DGLModel + +
+

DGL featurizer object

+
+
dtype + type + +
+

Data type.

+
+
pooling + str + +
+

Pooling method for GIN's embedding layer (Default: mean)

+
+
batch_size + int + +
+

Batch size to consider for model

+
+
+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
class PretrainedDGLTransformer(PretrainedMolTransformer):
+    r"""
+    DGL Pretrained transformer
+
+    Attributes:
+        featurizer (DGLModel): DGL featurizer object
+        dtype (type, optional): Data type.
+        pooling (str, optional): Pooling method for GIN's embedding layer (Default: mean)
+        batch_size (int, optional): Batch size to consider for model
+    """
+
+    def __init__(
+        self,
+        kind: Union[str, DGLModel] = "gin_supervised_contextpred",
+        dtype: Callable = np.float32,
+        pooling: str = "mean",
+        batch_size: int = 32,
+        preload: bool = False,
+        **params,
+    ):
+        """DGL pretrained featurizer
+
+        Args:
+            kind (str, optional): name of the pretrained gin. Defaults to "gin_supervised_contextpred".
+            dtype: datatype. Defaults to np.float32.
+            pooling: global pooling to perform. Defaults to "mean".
+            batch_size: batch size for featurizing the molecules. Defaults to 32.
+            preload: whether to preload the internal pretrained featurizer or not
+
+        """
+        if not requires.check("dgllife"):
+            raise ValueError("Cannot find dgl|dgllife. It's required for this featurizer !")
+        super().__init__(
+            dtype=dtype,
+            pooling=pooling,
+            batch_size=batch_size,
+            preload=preload,
+            kind=kind,
+            **params,
+        )
+        self.pooling = pooling
+        self.preload = preload
+        self._pooling_obj = self.get_pooling(pooling)
+        if isinstance(kind, DGLModel):
+            self.kind = kind.name
+            self.featurizer = kind
+        else:
+            self.kind = kind
+            self.featurizer = DGLModel(name=self.kind)
+        self.batch_size = int(batch_size)
+        if self.preload:
+            self._preload()
+
+    def __repr__(self):
+        return "{}(kind={}, pooling={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self.kind),
+            _parse_to_evaluable_str(self.pooling),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    def _update_params(self):
+        super()._update_params()
+        self._pooling_obj = self.get_pooling(self.pooling)
+        featurizer = DGLModel(name=self.kind)
+        self.featurizer = featurizer.load()
+
+    @staticmethod
+    def get_pooling(pooling: str):
+        """Get pooling method from name
+
+        Args:
+            pooling: name of the pooling method
+        """
+        pooling = pooling.lower()
+        if pooling in ["mean", "avg", "average"]:
+            return AvgPooling()
+        elif pooling == "sum":
+            return SumPooling()
+        elif pooling == "max":
+            return MaxPooling()
+        else:
+            raise ValueError(f"Pooling: {pooling} not supported !")
+
+    def _embed_gin(self, dataset):
+        """Embed molecules using GIN"""
+        data_loader = DataLoader(
+            dataset,
+            batch_size=self.batch_size,
+            collate_fn=dgl.batch,
+            shuffle=False,
+            drop_last=False,
+        )
+
+        mol_emb = []
+        for batch_id, bg in enumerate(data_loader):
+            if self.verbose:
+                logger.debug("Processing batch {:d}/{:d}".format(batch_id + 1, len(data_loader)))
+            nfeats = [
+                bg.ndata.pop("atomic_number").to(torch.device("cpu")),
+                bg.ndata.pop("chirality_type").to(torch.device("cpu")),
+            ]
+            efeats = [
+                bg.edata.pop("bond_type").to(torch.device("cpu")),
+                bg.edata.pop("bond_direction_type").to(torch.device("cpu")),
+            ]
+            with torch.no_grad():
+                node_repr = self.featurizer(bg, nfeats, efeats)
+            mol_emb.append(self._pooling_obj(bg, node_repr))
+        mol_emb = torch.cat(mol_emb, dim=0).detach().cpu().numpy()
+        return mol_emb
+
+    def _embed_jtvae(self, dataset):
+        """Embed molecules using JTVAE"""
+        dataloader = DataLoader(dataset, batch_size=1, collate_fn=JTVAECollator(training=False))
+
+        mol_emb = []
+        for tree, tree_graph, mol_graph in dataloader:
+            _, tree_vec, mol_vec = self.featurizer.encode(tree_graph, mol_graph)
+            enc = torch.cat([tree_vec, mol_vec], dim=1).detach()
+            mol_emb.append(enc)
+        mol_emb = torch.cat(mol_emb, dim=0).cpu().numpy()
+        return mol_emb
+
+    def _embed(self, smiles: List[str], **kwargs):
+        """Embed molecules into a latent space"""
+        self._preload()
+        dataset, successes = self.graph_featurizer(smiles, kind=self.kind)
+        if self.kind in DGLModel.available_models(query="^jtvae"):
+            mol_emb = self._embed_jtvae(dataset)
+        else:
+            mol_emb = self._embed_gin(dataset)
+
+        mol_emb = list(mol_emb)
+        out = []
+        k = 0
+        for success in successes:
+            if success:
+                out.append(mol_emb[k])
+                k += 1
+            else:
+                out.append(None)
+        return out
+
+    @staticmethod
+    def graph_featurizer(smiles: List[str], kind: Optional[str] = None):
+        """
+        Construct graphs from SMILES and featurize them
+
+        Args:
+            smiles: SMILES of molecules for embedding computation
+
+        Returns:
+            dataset: List of graphs constructed and featurized
+            list of bool: Indicators for whether the SMILES string can be parsed by RDKit
+        """
+        if kind in DGLModel.available_models(query="^jtvae"):
+            vocab = JTVAEVocab()
+
+            tmp_file = tempfile.NamedTemporaryFile(delete=False)
+            with fsspec.open(tmp_file.name, "w") as f:
+                f.write("\n".join(smiles))
+            dataset = JTVAEDataset(tmp_file.name, vocab, training=False)
+            os.unlink(tmp_file.name)
+            # JTVAE does not support failure
+            success = [True] * len(smiles)
+            if len(dataset) != len(smiles):
+                raise ValueError("JTVAE failed to featurize some molecules !")
+            return dataset, success
+
+        else:
+            graphs = []
+            success = []
+            for smi in smiles:
+                try:
+                    mol = dm.to_mol(smi)
+                    if mol is None:
+                        success.append(False)
+                        continue
+                    g = mol_to_bigraph(
+                        mol,
+                        add_self_loop=True,
+                        node_featurizer=PretrainAtomFeaturizer(),
+                        edge_featurizer=PretrainBondFeaturizer(),
+                        canonical_atom_order=False,
+                    )
+                    graphs.append(g)
+                    success.append(True)
+                except Exception as e:
+                    logger.error(e)
+                    success.append(False)
+            return graphs, success
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(kind='gin_supervised_contextpred', dtype=np.float32, pooling='mean', batch_size=32, preload=False, **params) + +

+ + +
+ +

DGL pretrained featurizer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
kind + str + +
+

name of the pretrained gin. Defaults to "gin_supervised_contextpred".

+
+
+ 'gin_supervised_contextpred' +
dtype + Callable + +
+

datatype. Defaults to np.float32.

+
+
+ float32 +
pooling + str + +
+

global pooling to perform. Defaults to "mean".

+
+
+ 'mean' +
batch_size + int + +
+

batch size for featurizing the molecules. Defaults to 32.

+
+
+ 32 +
preload + bool + +
+

whether to preload the internal pretrained featurizer or not

+
+
+ False +
+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
def __init__(
+    self,
+    kind: Union[str, DGLModel] = "gin_supervised_contextpred",
+    dtype: Callable = np.float32,
+    pooling: str = "mean",
+    batch_size: int = 32,
+    preload: bool = False,
+    **params,
+):
+    """DGL pretrained featurizer
+
+    Args:
+        kind (str, optional): name of the pretrained gin. Defaults to "gin_supervised_contextpred".
+        dtype: datatype. Defaults to np.float32.
+        pooling: global pooling to perform. Defaults to "mean".
+        batch_size: batch size for featurizing the molecules. Defaults to 32.
+        preload: whether to preload the internal pretrained featurizer or not
+
+    """
+    if not requires.check("dgllife"):
+        raise ValueError("Cannot find dgl|dgllife. It's required for this featurizer !")
+    super().__init__(
+        dtype=dtype,
+        pooling=pooling,
+        batch_size=batch_size,
+        preload=preload,
+        kind=kind,
+        **params,
+    )
+    self.pooling = pooling
+    self.preload = preload
+    self._pooling_obj = self.get_pooling(pooling)
+    if isinstance(kind, DGLModel):
+        self.kind = kind.name
+        self.featurizer = kind
+    else:
+        self.kind = kind
+        self.featurizer = DGLModel(name=self.kind)
+    self.batch_size = int(batch_size)
+    if self.preload:
+        self._preload()
+
+
+
+ +
+ + +
+ + + + +

+ get_pooling(pooling) + + + staticmethod + + +

+ + +
+ +

Get pooling method from name

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
pooling + str + +
+

name of the pooling method

+
+
+ required +
+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
@staticmethod
+def get_pooling(pooling: str):
+    """Get pooling method from name
+
+    Args:
+        pooling: name of the pooling method
+    """
+    pooling = pooling.lower()
+    if pooling in ["mean", "avg", "average"]:
+        return AvgPooling()
+    elif pooling == "sum":
+        return SumPooling()
+    elif pooling == "max":
+        return MaxPooling()
+    else:
+        raise ValueError(f"Pooling: {pooling} not supported !")
+
+
+
+ +
+ + +
+ + + + +

+ graph_featurizer(smiles, kind=None) + + + staticmethod + + +

+ + +
+ +

Construct graphs from SMILES and featurize them

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
smiles + List[str] + +
+

SMILES of molecules for embedding computation

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
dataset + +
+

List of graphs constructed and featurized

+
+
+ +
+

list of bool: Indicators for whether the SMILES string can be parsed by RDKit

+
+
+ +
+ Source code in molfeat/trans/pretrained/dgl_pretrained.py +
239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
@staticmethod
+def graph_featurizer(smiles: List[str], kind: Optional[str] = None):
+    """
+    Construct graphs from SMILES and featurize them
+
+    Args:
+        smiles: SMILES of molecules for embedding computation
+
+    Returns:
+        dataset: List of graphs constructed and featurized
+        list of bool: Indicators for whether the SMILES string can be parsed by RDKit
+    """
+    if kind in DGLModel.available_models(query="^jtvae"):
+        vocab = JTVAEVocab()
+
+        tmp_file = tempfile.NamedTemporaryFile(delete=False)
+        with fsspec.open(tmp_file.name, "w") as f:
+            f.write("\n".join(smiles))
+        dataset = JTVAEDataset(tmp_file.name, vocab, training=False)
+        os.unlink(tmp_file.name)
+        # JTVAE does not support failure
+        success = [True] * len(smiles)
+        if len(dataset) != len(smiles):
+            raise ValueError("JTVAE failed to featurize some molecules !")
+        return dataset, success
+
+    else:
+        graphs = []
+        success = []
+        for smi in smiles:
+            try:
+                mol = dm.to_mol(smi)
+                if mol is None:
+                    success.append(False)
+                    continue
+                g = mol_to_bigraph(
+                    mol,
+                    add_self_loop=True,
+                    node_featurizer=PretrainAtomFeaturizer(),
+                    edge_featurizer=PretrainBondFeaturizer(),
+                    canonical_atom_order=False,
+                )
+                graphs.append(g)
+                success.append(True)
+            except Exception as e:
+                logger.error(e)
+                success.append(False)
+        return graphs, success
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.pretrained.fcd.html b/0.9.7/api/molfeat.trans.pretrained.fcd.html new file mode 100644 index 0000000..6b7e10d --- /dev/null +++ b/0.9.7/api/molfeat.trans.pretrained.fcd.html @@ -0,0 +1,1798 @@ + + + + + + + + + + + + + + + + + + + + + + + + + FCD - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

FCD

+ +

FCD model

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ FCDTransformer + + +

+ + +
+

+ Bases: PretrainedMolTransformer

+ + +

FCD transformer based on the ChemNet pretrained model

+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
featurizer + FCD + +
+

FCD featurizer object

+
+
dtype + type + +
+

Data type. Use call instead

+
+
+ +
+ Source code in molfeat/trans/pretrained/fcd.py +
10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
class FCDTransformer(PretrainedMolTransformer):
+    r"""
+    FCD transformer based on the ChemNet pretrained model
+
+    Attributes:
+        featurizer (FCD): FCD featurizer object
+        dtype (type, optional): Data type. Use call instead
+    """
+
+    def __init__(self, n_jobs=1, dtype=np.float32, **params):
+        super().__init__(dtype=dtype, **params)
+        if not requires.check("fcd_torch"):
+            raise ImportError(
+                "`fcd_torch` is not available, please install it `conda install -c conda-forge fcd_torch'`"
+            )
+
+        self.n_jobs = n_jobs
+        self.featurizer = FCD(n_jobs=n_jobs)
+
+    def _embed(self, smiles, **kwargs):
+        """Compute embedding"""
+        return self.featurizer.get_predictions(smiles)
+
+    def _update_params(self):
+        super()._update_params()
+        self.featurizer = FCD(n_jobs=self.n_jobs)
+
+
+ + + +
+ + + + + + + + + + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.pretrained.graphormer.html b/0.9.7/api/molfeat.trans.pretrained.graphormer.html new file mode 100644 index 0000000..f35b312 --- /dev/null +++ b/0.9.7/api/molfeat.trans.pretrained.graphormer.html @@ -0,0 +1,2665 @@ + + + + + + + + + + + + + + + + + + + + + + + + + Graphormer - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + + + + + +
+
+ + + + + + + +

Graphormer

+ +

Graphormer pretrained models

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ GraphormerTransformer + + +

+ + +
+

+ Bases: PretrainedMolTransformer

+ + +

Graphormer transformer from microsoft, pretrained on PCQM4Mv2 quantum chemistry dataset +for the prediction of homo-lumo gap.

+ + + +

Attributes:

+ + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescription
featurizer + +
+

Graphormer embedding object

+
+
dtype + +
+

Data type. Use call instead

+
+
pooling + +
+

Pooling method for Graphormer's embedding layer

+
+
+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
class GraphormerTransformer(PretrainedMolTransformer):
+    r"""
+    Graphormer transformer from microsoft, pretrained on PCQM4Mv2 quantum chemistry dataset
+    for the prediction of homo-lumo gap.
+
+    Attributes:
+        featurizer: Graphormer embedding object
+        dtype: Data type. Use call instead
+        pooling: Pooling method for Graphormer's embedding layer
+    """
+
+    def __init__(
+        self,
+        kind: str = "pcqm4mv2_graphormer_base",
+        dtype: Callable = np.float32,
+        pooling: str = "mean",
+        max_length: Optional[int] = None,
+        concat_layers: Union[List[int], int] = -1,
+        ignore_padding: bool = True,
+        version=None,
+        **params,
+    ):
+        """
+        Pretrained graphormer featurizer.
+
+        !!! note
+            The default behaviour of this feature extractor is to return the last hidden state of the encoder,
+            averaged across all nodes (including the virtual node connected to all other nodes).
+
+            For a different behaviour, please change the pooling method:
+            * `graph` or `virtual`: use the virtual node embedding in the last layer to get the graph representation
+            * `mean`, `max`, `sum`, etc or any other supported pooling of `molfeat.utils.pooler.Pooling`
+                will take the operation defined by the pooling layer across all nodes of each graph
+
+        Args:
+            kind: name of the featurizer as available in the model store
+            dtype: Data type to output
+            pooling: type of pooling to use. One of ['graph', 'virtual', 'mean', 'max', 'sum']. The value "graph" corresponds to the virtual node representation
+            max_length: Maximum length of the input sequence to consider. Please update this for large sequences
+            concat_layers: Layer to concat to get the representation. By default the last hidden layer is returned.
+            ignore_padding: Whether to ignore padding in the representation (default: True) to avoid effect of batching
+            params: any other parameter to pass to PretrainedMolTransformer
+        """
+
+        super().__init__(dtype=dtype, pooling=pooling, **params)
+        if not requires.check("graphormer_pretrained"):
+            raise ValueError("`graphormer` is required to use this featurizer.")
+
+        if concat_layers is None:
+            concat_layers = -1
+        if not isinstance(concat_layers, list):
+            concat_layers = [concat_layers]
+        self.concat_layers = concat_layers
+        self.preload = True
+        self.name = kind
+        self._require_mols = False
+        self.max_length = max_length
+        self.ignore_padding = ignore_padding
+        if isinstance(pooling, str):
+            if pooling in Pooling.SUPPORTED_POOLING:
+                pooling = Pooling(dim=1, name=pooling)
+            else:
+                pooling = None
+        self.pooling = pooling
+        self.featurizer = GraphormerEmbeddingsExtractor(
+            pretrained_name=self.name, max_nodes=self.max_length, concat_layers=self.concat_layers
+        )
+        self.featurizer.config.max_nodes = self.max_length
+        self.version = version
+
+    def __repr__(self):
+        return "{}(name={}, pooling={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self.name),
+            _parse_to_evaluable_str(self.pooling.name),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    @staticmethod
+    def list_available_models():
+        """List available graphormer model to use"""
+        return [
+            "pcqm4mv1_graphormer_base",  # PCQM4Mv1
+            "pcqm4mv2_graphormer_base",  # PCQM4Mv2
+            "pcqm4mv1_graphormer_base_for_molhiv",  # ogbg-molhiv
+            "oc20is2re_graphormer3d_base",  # Open Catalyst Challenge
+        ]
+
+    def _embed(self, inputs: List[str], **kwargs):
+        """Internal molecular embedding
+
+        Args:
+            smiles: input smiles
+        """
+        with torch.no_grad():
+            layer_reprs, graph_reprs, padding_mask = self.featurizer(inputs)
+            if self.pooling is None:
+                x = graph_reprs
+            else:
+                x = self.pooling(layer_reprs, mask=(padding_mask if self.ignore_padding else None))
+        return x.numpy()
+
+    def __getstate__(self):
+        """Getting state to allow pickling"""
+        d = copy.deepcopy(self.__dict__)
+        d["precompute_cache"] = None
+        d.pop("featurizer", None)
+        return d
+
+    def __setstate__(self, d):
+        """Setting state during reloading pickling"""
+        self.__dict__.update(d)
+        self._update_params()
+
+    def compute_max_length(self, inputs: list):
+        """Compute maximum node number for the input list of molecules
+
+        Args:
+            inputs: input list of molecules
+        """
+        dataset = GraphormerInferenceDataset(
+            inputs,
+            multi_hop_max_dist=self.featurizer.config.multi_hop_max_dist,
+            spatial_pos_max=self.featurizer.config.spatial_pos_max,
+        )
+        xs = [item.x.size(0) for item in dataset]
+        return max(xs)
+
+    def set_max_length(self, max_length: int):
+        """Set the maximum length for this featurizer"""
+        self.max_length = max_length
+        self._update_params()
+        self._preload()
+
+    def _convert(self, inputs: list, **kwargs):
+        """Convert molecules to the right format
+
+        Args:
+            inputs: inputs to preprocess
+
+        Returns:
+            processed: pre-processed input list
+        """
+        inputs = super()._convert(inputs, **kwargs)
+        batch = self.featurizer._convert(inputs)
+        return batch
+
+    def _update_params(self):
+        super()._update_params()
+        self.featurizer = GraphormerEmbeddingsExtractor(
+            pretrained_name=self.name, max_nodes=self.max_length
+        )
+        self.featurizer.config.max_nodes = self.max_length
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __getstate__() + +

+ + +
+ +

Getting state to allow pickling

+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
121
+122
+123
+124
+125
+126
def __getstate__(self):
+    """Getting state to allow pickling"""
+    d = copy.deepcopy(self.__dict__)
+    d["precompute_cache"] = None
+    d.pop("featurizer", None)
+    return d
+
+
+
+ +
+ + +
+ + + + +

+ __init__(kind='pcqm4mv2_graphormer_base', dtype=np.float32, pooling='mean', max_length=None, concat_layers=-1, ignore_padding=True, version=None, **params) + +

+ + +
+ +

Pretrained graphormer featurizer.

+
+

Note

+

The default behaviour of this feature extractor is to return the last hidden state of the encoder, +averaged across all nodes (including the virtual node connected to all other nodes).

+

For a different behaviour, please change the pooling method: +* graph or virtual: use the virtual node embedding in the last layer to get the graph representation +* mean, max, sum, etc or any other supported pooling of molfeat.utils.pooler.Pooling + will take the operation defined by the pooling layer across all nodes of each graph

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
kind + str + +
+

name of the featurizer as available in the model store

+
+
+ 'pcqm4mv2_graphormer_base' +
dtype + Callable + +
+

Data type to output

+
+
+ float32 +
pooling + str + +
+

type of pooling to use. One of ['graph', 'virtual', 'mean', 'max', 'sum']. The value "graph" corresponds to the virtual node representation

+
+
+ 'mean' +
max_length + Optional[int] + +
+

Maximum length of the input sequence to consider. Please update this for large sequences

+
+
+ None +
concat_layers + Union[List[int], int] + +
+

Layer to concat to get the representation. By default the last hidden layer is returned.

+
+
+ -1 +
ignore_padding + bool + +
+

Whether to ignore padding in the representation (default: True) to avoid effect of batching

+
+
+ True +
params + +
+

any other parameter to pass to PretrainedMolTransformer

+
+
+ {} +
+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
def __init__(
+    self,
+    kind: str = "pcqm4mv2_graphormer_base",
+    dtype: Callable = np.float32,
+    pooling: str = "mean",
+    max_length: Optional[int] = None,
+    concat_layers: Union[List[int], int] = -1,
+    ignore_padding: bool = True,
+    version=None,
+    **params,
+):
+    """
+    Pretrained graphormer featurizer.
+
+    !!! note
+        The default behaviour of this feature extractor is to return the last hidden state of the encoder,
+        averaged across all nodes (including the virtual node connected to all other nodes).
+
+        For a different behaviour, please change the pooling method:
+        * `graph` or `virtual`: use the virtual node embedding in the last layer to get the graph representation
+        * `mean`, `max`, `sum`, etc or any other supported pooling of `molfeat.utils.pooler.Pooling`
+            will take the operation defined by the pooling layer across all nodes of each graph
+
+    Args:
+        kind: name of the featurizer as available in the model store
+        dtype: Data type to output
+        pooling: type of pooling to use. One of ['graph', 'virtual', 'mean', 'max', 'sum']. The value "graph" corresponds to the virtual node representation
+        max_length: Maximum length of the input sequence to consider. Please update this for large sequences
+        concat_layers: Layer to concat to get the representation. By default the last hidden layer is returned.
+        ignore_padding: Whether to ignore padding in the representation (default: True) to avoid effect of batching
+        params: any other parameter to pass to PretrainedMolTransformer
+    """
+
+    super().__init__(dtype=dtype, pooling=pooling, **params)
+    if not requires.check("graphormer_pretrained"):
+        raise ValueError("`graphormer` is required to use this featurizer.")
+
+    if concat_layers is None:
+        concat_layers = -1
+    if not isinstance(concat_layers, list):
+        concat_layers = [concat_layers]
+    self.concat_layers = concat_layers
+    self.preload = True
+    self.name = kind
+    self._require_mols = False
+    self.max_length = max_length
+    self.ignore_padding = ignore_padding
+    if isinstance(pooling, str):
+        if pooling in Pooling.SUPPORTED_POOLING:
+            pooling = Pooling(dim=1, name=pooling)
+        else:
+            pooling = None
+    self.pooling = pooling
+    self.featurizer = GraphormerEmbeddingsExtractor(
+        pretrained_name=self.name, max_nodes=self.max_length, concat_layers=self.concat_layers
+    )
+    self.featurizer.config.max_nodes = self.max_length
+    self.version = version
+
+
+
+ +
+ + +
+ + + + +

+ __setstate__(d) + +

+ + +
+ +

Setting state during reloading pickling

+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
128
+129
+130
+131
def __setstate__(self, d):
+    """Setting state during reloading pickling"""
+    self.__dict__.update(d)
+    self._update_params()
+
+
+
+ +
+ + +
+ + + + +

+ compute_max_length(inputs) + +

+ + +
+ +

Compute maximum node number for the input list of molecules

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
inputs + list + +
+

input list of molecules

+
+
+ required +
+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
def compute_max_length(self, inputs: list):
+    """Compute maximum node number for the input list of molecules
+
+    Args:
+        inputs: input list of molecules
+    """
+    dataset = GraphormerInferenceDataset(
+        inputs,
+        multi_hop_max_dist=self.featurizer.config.multi_hop_max_dist,
+        spatial_pos_max=self.featurizer.config.spatial_pos_max,
+    )
+    xs = [item.x.size(0) for item in dataset]
+    return max(xs)
+
+
+
+ +
+ + +
+ + + + +

+ list_available_models() + + + staticmethod + + +

+ + +
+ +

List available graphormer model to use

+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
@staticmethod
+def list_available_models():
+    """List available graphormer model to use"""
+    return [
+        "pcqm4mv1_graphormer_base",  # PCQM4Mv1
+        "pcqm4mv2_graphormer_base",  # PCQM4Mv2
+        "pcqm4mv1_graphormer_base_for_molhiv",  # ogbg-molhiv
+        "oc20is2re_graphormer3d_base",  # Open Catalyst Challenge
+    ]
+
+
+
+ +
+ + +
+ + + + +

+ set_max_length(max_length) + +

+ + +
+ +

Set the maximum length for this featurizer

+ +
+ Source code in molfeat/trans/pretrained/graphormer.py +
147
+148
+149
+150
+151
def set_max_length(self, max_length: int):
+    """Set the maximum length for this featurizer"""
+    self.max_length = max_length
+    self._update_params()
+    self._preload()
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.pretrained.hf_transformers.html b/0.9.7/api/molfeat.trans.pretrained.hf_transformers.html new file mode 100644 index 0000000..3650e30 --- /dev/null +++ b/0.9.7/api/molfeat.trans.pretrained.hf_transformers.html @@ -0,0 +1,3944 @@ + + + + + + + + + + + + + + + + + + + + + + + + + HuggingFace - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+ +
+ + + +
+
+ + + + + + + +

HuggingFace

+ +

Transformer pretrained models

+

HuggingFace Transformers

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ HFExperiment + + + + dataclass + + +

+ + +
+ + +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
@dataclass
+class HFExperiment:
+    model: PreTrainedModel
+    tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast]
+    notation: str = "smiles"
+
+    @classmethod
+    def save(cls, model: HFExperiment, path: str, clean_up: bool = False):
+        """Save a hugging face model to a specific path
+
+        Args:
+            model: model to save
+            path: path to the folder root where to save the model
+            clean_up: whether to clean up the local path after saving
+        """
+        local_path = tempfile.mkdtemp()
+        # we can save both the tokenizer and the model to the same path
+        model.model.save_pretrained(local_path)
+        model.tokenizer.save_pretrained(local_path)
+
+        # With transformers>=4.35.0, models are by default saved as safetensors.
+        # For backwards compatibility, we also save the model as the older pickle-based format.
+        model.model.save_pretrained(local_path, safe_serialization=False)
+
+        dm.fs.copy_dir(local_path, path, force=True, progress=True, leave_progress=False)
+        logger.info(f"Model saved to {path}")
+        # clean up now
+        if clean_up:
+            mapper = dm.fs.get_mapper(local_path)
+            mapper.fs.delete(local_path, recursive=True)
+        return path
+
+    @classmethod
+    def load(cls, path: str, model_class=None, device: str = "cpu"):
+        """Load a model from the given path
+        Args:
+            path: Path to the model to load
+            model_class: optional model class to provide if the model should be loaded with a specific class
+            device: the device to load the model on ("cpu" or "cuda")
+        """
+        if not dm.fs.is_local_path(path):
+            local_path = tempfile.mkdtemp()
+            dm.fs.copy_dir(path, local_path, force=True, progress=True, leave_progress=False)
+        else:
+            local_path = path
+
+        if model_class is None:
+            model_config = AutoConfig.from_pretrained(local_path)
+            architectures = getattr(model_config, "architectures", [])
+            if len(architectures) > 0:
+                model_class = MODEL_MAPPING._load_attr_from_module(
+                    model_config.model_type, architectures[0]
+                )
+            else:
+                model_class = AutoModel
+        model = model_class.from_pretrained(local_path).to(device)
+        tokenizer = AutoTokenizer.from_pretrained(local_path)
+        return cls(model, tokenizer)
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ load(path, model_class=None, device='cpu') + + + classmethod + + +

+ + +
+ +

Load a model from the given path +Args: + path: Path to the model to load + model_class: optional model class to provide if the model should be loaded with a specific class + device: the device to load the model on ("cpu" or "cuda")

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
@classmethod
+def load(cls, path: str, model_class=None, device: str = "cpu"):
+    """Load a model from the given path
+    Args:
+        path: Path to the model to load
+        model_class: optional model class to provide if the model should be loaded with a specific class
+        device: the device to load the model on ("cpu" or "cuda")
+    """
+    if not dm.fs.is_local_path(path):
+        local_path = tempfile.mkdtemp()
+        dm.fs.copy_dir(path, local_path, force=True, progress=True, leave_progress=False)
+    else:
+        local_path = path
+
+    if model_class is None:
+        model_config = AutoConfig.from_pretrained(local_path)
+        architectures = getattr(model_config, "architectures", [])
+        if len(architectures) > 0:
+            model_class = MODEL_MAPPING._load_attr_from_module(
+                model_config.model_type, architectures[0]
+            )
+        else:
+            model_class = AutoModel
+    model = model_class.from_pretrained(local_path).to(device)
+    tokenizer = AutoTokenizer.from_pretrained(local_path)
+    return cls(model, tokenizer)
+
+
+
+ +
+ + +
+ + + + +

+ save(model, path, clean_up=False) + + + classmethod + + +

+ + +
+ +

Save a hugging face model to a specific path

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
model + HFExperiment + +
+

model to save

+
+
+ required +
path + str + +
+

path to the folder root where to save the model

+
+
+ required +
clean_up + bool + +
+

whether to clean up the local path after saving

+
+
+ False +
+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
@classmethod
+def save(cls, model: HFExperiment, path: str, clean_up: bool = False):
+    """Save a hugging face model to a specific path
+
+    Args:
+        model: model to save
+        path: path to the folder root where to save the model
+        clean_up: whether to clean up the local path after saving
+    """
+    local_path = tempfile.mkdtemp()
+    # we can save both the tokenizer and the model to the same path
+    model.model.save_pretrained(local_path)
+    model.tokenizer.save_pretrained(local_path)
+
+    # With transformers>=4.35.0, models are by default saved as safetensors.
+    # For backwards compatibility, we also save the model as the older pickle-based format.
+    model.model.save_pretrained(local_path, safe_serialization=False)
+
+    dm.fs.copy_dir(local_path, path, force=True, progress=True, leave_progress=False)
+    logger.info(f"Model saved to {path}")
+    # clean up now
+    if clean_up:
+        mapper = dm.fs.get_mapper(local_path)
+        mapper.fs.delete(local_path, recursive=True)
+    return path
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ HFModel + + +

+ + +
+

+ Bases: PretrainedStoreModel

+ + +

Transformer model loading model loading

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
class HFModel(PretrainedStoreModel):
+    """Transformer model loading model loading"""
+
+    def __init__(
+        self,
+        name: str,
+        cache_path: Optional[os.PathLike] = None,
+        store: Optional[ModelStore] = None,
+    ):
+        """Model loader initializer
+
+        Args:
+            name (str, optional): Name of the model for ada.
+            cache_path (os.PathLike, optional): Local cache path for faster loading. This is the cache_path parameter for ADA loading !
+        """
+
+        super().__init__(name, cache_path=cache_path, store=store)
+        self._model = None
+
+    @classmethod
+    def _ensure_local(cls, object_path: Union[str, os.PathLike]):
+        """Make sure the input path is a local path otherwise download it
+
+        Args:
+            object_path: Path to the object
+
+        """
+        if dm.fs.is_local_path(object_path):
+            return object_path
+        local_path = tempfile.mkdtemp()
+        if dm.fs.is_file(object_path):
+            local_path = os.path.join(local_path, os.path.basename(object_path))
+            dm.fs.copy_file(object_path, local_path)
+        else:
+            dm.fs.copy_dir(object_path, local_path)
+        return local_path
+
+    @classmethod
+    def from_pretrained(
+        cls,
+        model: Union[str, PreTrainedModel],
+        tokenizer: Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast],
+        model_class=None,
+        model_name: Optional[str] = None,
+    ):
+        """Load model using huggingface pretrained model loader hook
+
+        Args:
+            model: Model to load. Can also be the name on the hub or the path to the model
+            tokenizer: Tokenizer to load. Can also be the name on the hub or the path to the tokenizer
+            model_class: optional model class to provide if the model should be loaded with a specific class
+            model_name: optional model name to give to this model.
+        """
+
+        # load the model
+        if isinstance(model, PreTrainedModel):
+            model_obj = model
+        else:
+            if dm.fs.exists(model):
+                model = cls._ensure_local(model)
+            if model_class is None:
+                model_config = AutoConfig.from_pretrained(model)
+                architectures = getattr(model_config, "architectures", [])
+                if len(architectures) > 0:
+                    model_class = MODEL_MAPPING._load_attr_from_module(
+                        model_config.model_type, architectures[0]
+                    )
+                else:
+                    model_class = AutoModel
+            model_obj = model_class.from_pretrained(model)
+
+        if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
+            tokenizer_obj = tokenizer
+        else:
+            if dm.fs.exists(tokenizer):
+                tokenizer = cls._ensure_local(tokenizer)
+            tokenizer_obj = AutoTokenizer.from_pretrained(tokenizer)
+        name = model_name or f"hf_model_{uuid.uuid4().hex[:8]}"
+        model = HFModel(name=name, store=ModelStore())
+        model._model = HFExperiment(model=model_obj, tokenizer=tokenizer_obj)
+        return model
+
+    @classmethod
+    def register_pretrained(
+        cls,
+        model: Union[str, PreTrainedModel],
+        tokenizer: Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast],
+        model_card: ModelInfo,
+        model_class=None,
+    ):
+        """Register a pretrained huggingface model to the model store
+        Args:
+            model: Model to load. Can also be the name on the hub or the path to the model
+            tokenizer: Tokenizer to load. Can also be the name on the hub or the path to the tokenizer
+            model_class: optional model class to provide if the model should be loaded with a specific class
+            model_card: optional model card to provide for registering this model
+        """
+        model = cls.from_pretrained(model, tokenizer, model_class, model_name=model_card.name)
+        model.store.register(model_card, model._model, save_fn=HFExperiment.save)
+        return model
+
+    def get_notation(self, default_notation: Optional[str] = None):
+        """Get the notation of the model"""
+        notation = default_notation
+        try:
+            modelcard = self.store.search(name=self.name)[0]
+            notation = modelcard.inputs
+        except Exception:
+            pass
+        return notation
+
+    def load(self):
+        """Load Transformer Pretrained featurizer model"""
+        if self._model is not None:
+            return self._model
+        download_output_dir = self._artifact_load(
+            name=self.name, download_path=self.cache_path, store=self.store
+        )
+        model_path = dm.fs.join(download_output_dir, self.store.MODEL_PATH_NAME)
+        self._model = HFExperiment.load(model_path)
+        return self._model
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(name, cache_path=None, store=None) + +

+ + +
+ +

Model loader initializer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + str + +
+

Name of the model for ada.

+
+
+ required +
cache_path + PathLike + +
+

Local cache path for faster loading. This is the cache_path parameter for ADA loading !

+
+
+ None +
+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
def __init__(
+    self,
+    name: str,
+    cache_path: Optional[os.PathLike] = None,
+    store: Optional[ModelStore] = None,
+):
+    """Model loader initializer
+
+    Args:
+        name (str, optional): Name of the model for ada.
+        cache_path (os.PathLike, optional): Local cache path for faster loading. This is the cache_path parameter for ADA loading !
+    """
+
+    super().__init__(name, cache_path=cache_path, store=store)
+    self._model = None
+
+
+
+ +
+ + +
+ + + + +

+ from_pretrained(model, tokenizer, model_class=None, model_name=None) + + + classmethod + + +

+ + +
+ +

Load model using huggingface pretrained model loader hook

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
model + Union[str, PreTrainedModel] + +
+

Model to load. Can also be the name on the hub or the path to the model

+
+
+ required +
tokenizer + Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast] + +
+

Tokenizer to load. Can also be the name on the hub or the path to the tokenizer

+
+
+ required +
model_class + +
+

optional model class to provide if the model should be loaded with a specific class

+
+
+ None +
model_name + Optional[str] + +
+

optional model name to give to this model.

+
+
+ None +
+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
@classmethod
+def from_pretrained(
+    cls,
+    model: Union[str, PreTrainedModel],
+    tokenizer: Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast],
+    model_class=None,
+    model_name: Optional[str] = None,
+):
+    """Load model using huggingface pretrained model loader hook
+
+    Args:
+        model: Model to load. Can also be the name on the hub or the path to the model
+        tokenizer: Tokenizer to load. Can also be the name on the hub or the path to the tokenizer
+        model_class: optional model class to provide if the model should be loaded with a specific class
+        model_name: optional model name to give to this model.
+    """
+
+    # load the model
+    if isinstance(model, PreTrainedModel):
+        model_obj = model
+    else:
+        if dm.fs.exists(model):
+            model = cls._ensure_local(model)
+        if model_class is None:
+            model_config = AutoConfig.from_pretrained(model)
+            architectures = getattr(model_config, "architectures", [])
+            if len(architectures) > 0:
+                model_class = MODEL_MAPPING._load_attr_from_module(
+                    model_config.model_type, architectures[0]
+                )
+            else:
+                model_class = AutoModel
+        model_obj = model_class.from_pretrained(model)
+
+    if isinstance(tokenizer, (PreTrainedTokenizer, PreTrainedTokenizerFast)):
+        tokenizer_obj = tokenizer
+    else:
+        if dm.fs.exists(tokenizer):
+            tokenizer = cls._ensure_local(tokenizer)
+        tokenizer_obj = AutoTokenizer.from_pretrained(tokenizer)
+    name = model_name or f"hf_model_{uuid.uuid4().hex[:8]}"
+    model = HFModel(name=name, store=ModelStore())
+    model._model = HFExperiment(model=model_obj, tokenizer=tokenizer_obj)
+    return model
+
+
+
+ +
+ + +
+ + + + +

+ get_notation(default_notation=None) + +

+ + +
+ +

Get the notation of the model

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
195
+196
+197
+198
+199
+200
+201
+202
+203
def get_notation(self, default_notation: Optional[str] = None):
+    """Get the notation of the model"""
+    notation = default_notation
+    try:
+        modelcard = self.store.search(name=self.name)[0]
+        notation = modelcard.inputs
+    except Exception:
+        pass
+    return notation
+
+
+
+ +
+ + +
+ + + + +

+ load() + +

+ + +
+ +

Load Transformer Pretrained featurizer model

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
205
+206
+207
+208
+209
+210
+211
+212
+213
+214
def load(self):
+    """Load Transformer Pretrained featurizer model"""
+    if self._model is not None:
+        return self._model
+    download_output_dir = self._artifact_load(
+        name=self.name, download_path=self.cache_path, store=self.store
+    )
+    model_path = dm.fs.join(download_output_dir, self.store.MODEL_PATH_NAME)
+    self._model = HFExperiment.load(model_path)
+    return self._model
+
+
+
+ +
+ + +
+ + + + +

+ register_pretrained(model, tokenizer, model_card, model_class=None) + + + classmethod + + +

+ + +
+ +

Register a pretrained huggingface model to the model store +Args: + model: Model to load. Can also be the name on the hub or the path to the model + tokenizer: Tokenizer to load. Can also be the name on the hub or the path to the tokenizer + model_class: optional model class to provide if the model should be loaded with a specific class + model_card: optional model card to provide for registering this model

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
@classmethod
+def register_pretrained(
+    cls,
+    model: Union[str, PreTrainedModel],
+    tokenizer: Union[str, PreTrainedTokenizer, PreTrainedTokenizerFast],
+    model_card: ModelInfo,
+    model_class=None,
+):
+    """Register a pretrained huggingface model to the model store
+    Args:
+        model: Model to load. Can also be the name on the hub or the path to the model
+        tokenizer: Tokenizer to load. Can also be the name on the hub or the path to the tokenizer
+        model_class: optional model class to provide if the model should be loaded with a specific class
+        model_card: optional model card to provide for registering this model
+    """
+    model = cls.from_pretrained(model, tokenizer, model_class, model_name=model_card.name)
+    model.store.register(model_card, model._model, save_fn=HFExperiment.save)
+    return model
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ PretrainedHFTransformer + + +

+ + +
+

+ Bases: PretrainedMolTransformer

+ + +

HuggingFace Transformer for feature extraction.

+
+

Note

+

For convenience and consistency, this featurizer only accepts as inputs +smiles and molecules, then perform the internal conversion, based on the notation provided.

+
+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
class PretrainedHFTransformer(PretrainedMolTransformer):
+    """
+    HuggingFace Transformer for feature extraction.
+
+    !!! note
+        For convenience and consistency, this featurizer only accepts as inputs
+        smiles and molecules, then perform the internal conversion, based on the notation provided.
+    """
+
+    NEEDS_RANDOM_SEED = ["bert", "roberta"]
+
+    def __init__(
+        self,
+        kind: Union[str, HFModel] = "ChemBERTa-77M-MLM",
+        notation: Optional[str] = "none",
+        pooling: str = "mean",
+        concat_layers: Union[List[int], int] = -1,
+        prefer_encoder: bool = True,
+        dtype=np.float32,
+        device="cpu",
+        max_length: int = 128,
+        ignore_padding: bool = True,
+        preload: bool = False,
+        n_jobs: int = 0,
+        random_seed: Optional[int] = None,
+        **params,
+    ):
+        """
+        HuggingFace Transformer for featurizer extraction
+        The default behaviour of this feature extractor is to return the last hidden state of the encoder
+        similar to what is performed by the pipeline 'feature-extraction' in hugging face.
+
+        !!! warning
+            For bert models, the default pooling layers is a neural network. Therefore, do not use the default
+            Or provide a random seed for reproducibility (in this case pooling will act as random projection to the same manifold)
+
+        !!! note
+            The pooling module of this featurizer is accessible through the `_pooling_obj` attribute.
+
+        Args:
+            kind: name of the featurizer as available in the model store
+            notation: optional line notation to use. Only use if it cannot be found from the model card.
+            pooling: type of pooling to use. One of ['default', 'mean', 'max', 'sum', 'clf', ]. The value "default" corresponds to the default litterature pooling for each model type.
+                See `molfeat.utils.pooler.get_default_hf_pooler` for more details.
+            concat_layers: Layer to concat to get the representation. By default the last hidden layer is returned.
+            prefer_encoder: For an encoder-decoder model, prefer the embeddings provided by the encoder.
+            dtype: Data type to output
+            device: Torch device on which to run the featurizer.
+            max_length: Maximum length of the input sequence to consider. Please update this for large sequences
+            ignore_padding: Whether to ignore padding in the representation (default: True) to avoid effect of batching
+            preload: Whether to preload the model into memory or not
+            n_jobs: number of jobs to use
+            random_seed: random seed to use for reproducibility whenever a DNN pooler is used (e.g bert/roberta)
+        """
+
+        if not requires.check("transformers"):
+            raise ValueError(
+                "Cannot find transformers and/or tokenizers. It's required for this featurizer !"
+            )
+
+        super().__init__(
+            dtype=dtype,
+            device=device,
+            n_jobs=n_jobs,
+            **params,
+        )
+        if concat_layers is None:
+            concat_layers = -1
+        if not isinstance(concat_layers, list):
+            concat_layers = [concat_layers]
+        self.concat_layers = concat_layers
+        self.max_length = max_length
+        self.ignore_padding = ignore_padding
+        self._require_mols = False
+        self.random_seed = random_seed
+        self.preload = preload
+        self.pooling = pooling
+        self.prefer_encoder = prefer_encoder
+        self.device = torch.device(device)
+        self._pooling_obj = None
+        if isinstance(kind, HFModel):
+            self.kind = kind.name
+            self.featurizer = kind
+        else:
+            self.kind = kind
+            self.featurizer = HFModel(name=self.kind)
+        self.notation = self.featurizer.get_notation(notation) or "none"
+        self.converter = SmilesConverter(self.notation)
+        if self.preload:
+            self._preload()
+
+    def _update_params(self):
+        """Update the parameters of this model"""
+        # pylint: disable=no-member
+        super()._update_params()
+
+        hf_model = HFModel(
+            name=self.kind,
+        )
+        self.featurizer = hf_model.load()
+        config = self.featurizer.model.config.to_dict()
+        self._pooling_obj = self._pooling_obj = (
+            get_default_hgf_pooler(self.pooling, config, random_seed=self.random_seed)
+            if self._pooling_obj is None
+            else self._pooling_obj
+        )
+
+    def _preload(self):
+        """Perform preloading of the model from the store"""
+        super()._preload()
+        self.featurizer.model.to(self.device)
+        self.featurizer.max_length = self.max_length
+
+        # we can be confident that the model has been loaded here
+        if self._pooling_obj is not None and self.preload:
+            return
+        config = self.featurizer.model.config.to_dict()
+        cur_tokenizer = self.featurizer.tokenizer
+        for special_token_id_name in [
+            "pad_token_id",
+            "bos_token_id",
+            "eos_token_id",
+            "unk_token_id",
+            "sep_token_id",
+            "mask_token_id",
+        ]:
+            token_id = getattr(cur_tokenizer, special_token_id_name)
+            if token_id is not None:
+                config[special_token_id_name] = token_id
+
+        self._pooling_obj = (
+            get_default_hgf_pooler(self.pooling, config, random_seed=self.random_seed)
+            if self._pooling_obj is None
+            else self._pooling_obj
+        )
+        # pooling layer is still none, that means we could not fetch it properly
+        if self._pooling_obj is None:
+            logger.warning(
+                "Cannot confidently find the pooling layer and therefore will not apply pooling"
+            )
+
+    def _convert(self, inputs: list, **kwargs):
+        """Convert the list of molecules to the right format for embedding
+
+        Args:
+            inputs: inputs to preprocess
+
+        Returns:
+            processed: pre-processed input list
+        """
+        self._preload()
+
+        if isinstance(inputs, (str, dm.Mol)):
+            inputs = [inputs]
+
+        def _to_smiles(x):
+            return dm.to_smiles(x) if not isinstance(x, str) else x
+
+        parallel_kwargs = getattr(self, "parallel_kwargs", {})
+
+        if len(inputs) > 1:
+            smiles = dm.utils.parallelized(
+                _to_smiles,
+                inputs,
+                n_jobs=self.n_jobs,
+                **parallel_kwargs,
+            )
+            inputs = dm.utils.parallelized(
+                self.converter.encode,
+                smiles,
+                n_jobs=self.n_jobs,
+                **parallel_kwargs,
+            )
+        else:
+            inputs = self.converter.encode(_to_smiles(inputs[0]))
+        # this check is necessary for some tokenizers
+        if isinstance(inputs, str):
+            inputs = [inputs]
+        encoded = self.featurizer.tokenizer(
+            list(inputs),
+            truncation=True,
+            padding=True,
+            max_length=self.max_length,
+            return_tensors="pt",
+        )
+        return encoded
+
+    def _embed(self, inputs, **kwargs):
+        """
+        Perform embedding of inputs using the pretrained model
+
+        Args:
+            inputs: smiles or seqs
+            kwargs: any additional parameters
+        """
+        self._preload()
+
+        # Move inputs to the correct device
+        inputs = {key: value.to(self.device) for key, value in inputs.items()}
+
+        attention_mask = inputs.get("attention_mask", None)
+        if attention_mask is not None and self.ignore_padding:
+            attention_mask = attention_mask.unsqueeze(-1).to(self.device)  # B, S, 1
+        else:
+            attention_mask = None
+        with torch.no_grad():
+            if (
+                isinstance(self.featurizer.model, EncoderDecoderModel)
+                or hasattr(self.featurizer.model, "encoder")
+            ) and self.prefer_encoder:
+                out_dict = self.featurizer.model.encoder(output_hidden_states=True, **inputs)
+            else:
+                out_dict = self.featurizer.model(output_hidden_states=True, **inputs)
+            hidden_state = out_dict["hidden_states"]
+            emb_layers = []
+            for layer in self.concat_layers:
+                emb = hidden_state[layer].detach()  # B, S, D
+                emb = self._pooling_obj(
+                    emb,
+                    inputs["input_ids"],
+                    mask=attention_mask,
+                    ignore_padding=self.ignore_padding,
+                )
+                emb_layers.append(emb)
+            emb = torch.cat(emb_layers, dim=1)
+        return emb.cpu().numpy()  # Move the final tensor to CPU before converting to numpy array
+
+    def set_max_length(self, max_length: int):
+        """Set the maximum length for this featurizer"""
+        self.max_length = max_length
+        self._preload()
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(kind='ChemBERTa-77M-MLM', notation='none', pooling='mean', concat_layers=-1, prefer_encoder=True, dtype=np.float32, device='cpu', max_length=128, ignore_padding=True, preload=False, n_jobs=0, random_seed=None, **params) + +

+ + +
+ +

HuggingFace Transformer for featurizer extraction +The default behaviour of this feature extractor is to return the last hidden state of the encoder +similar to what is performed by the pipeline 'feature-extraction' in hugging face.

+
+

Warning

+

For bert models, the default pooling layers is a neural network. Therefore, do not use the default +Or provide a random seed for reproducibility (in this case pooling will act as random projection to the same manifold)

+
+
+

Note

+

The pooling module of this featurizer is accessible through the _pooling_obj attribute.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
kind + Union[str, HFModel] + +
+

name of the featurizer as available in the model store

+
+
+ 'ChemBERTa-77M-MLM' +
notation + Optional[str] + +
+

optional line notation to use. Only use if it cannot be found from the model card.

+
+
+ 'none' +
pooling + str + +
+

type of pooling to use. One of ['default', 'mean', 'max', 'sum', 'clf', ]. The value "default" corresponds to the default litterature pooling for each model type. +See molfeat.utils.pooler.get_default_hf_pooler for more details.

+
+
+ 'mean' +
concat_layers + Union[List[int], int] + +
+

Layer to concat to get the representation. By default the last hidden layer is returned.

+
+
+ -1 +
prefer_encoder + bool + +
+

For an encoder-decoder model, prefer the embeddings provided by the encoder.

+
+
+ True +
dtype + +
+

Data type to output

+
+
+ float32 +
device + +
+

Torch device on which to run the featurizer.

+
+
+ 'cpu' +
max_length + int + +
+

Maximum length of the input sequence to consider. Please update this for large sequences

+
+
+ 128 +
ignore_padding + bool + +
+

Whether to ignore padding in the representation (default: True) to avoid effect of batching

+
+
+ True +
preload + bool + +
+

Whether to preload the model into memory or not

+
+
+ False +
n_jobs + int + +
+

number of jobs to use

+
+
+ 0 +
random_seed + Optional[int] + +
+

random seed to use for reproducibility whenever a DNN pooler is used (e.g bert/roberta)

+
+
+ None +
+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
def __init__(
+    self,
+    kind: Union[str, HFModel] = "ChemBERTa-77M-MLM",
+    notation: Optional[str] = "none",
+    pooling: str = "mean",
+    concat_layers: Union[List[int], int] = -1,
+    prefer_encoder: bool = True,
+    dtype=np.float32,
+    device="cpu",
+    max_length: int = 128,
+    ignore_padding: bool = True,
+    preload: bool = False,
+    n_jobs: int = 0,
+    random_seed: Optional[int] = None,
+    **params,
+):
+    """
+    HuggingFace Transformer for featurizer extraction
+    The default behaviour of this feature extractor is to return the last hidden state of the encoder
+    similar to what is performed by the pipeline 'feature-extraction' in hugging face.
+
+    !!! warning
+        For bert models, the default pooling layers is a neural network. Therefore, do not use the default
+        Or provide a random seed for reproducibility (in this case pooling will act as random projection to the same manifold)
+
+    !!! note
+        The pooling module of this featurizer is accessible through the `_pooling_obj` attribute.
+
+    Args:
+        kind: name of the featurizer as available in the model store
+        notation: optional line notation to use. Only use if it cannot be found from the model card.
+        pooling: type of pooling to use. One of ['default', 'mean', 'max', 'sum', 'clf', ]. The value "default" corresponds to the default litterature pooling for each model type.
+            See `molfeat.utils.pooler.get_default_hf_pooler` for more details.
+        concat_layers: Layer to concat to get the representation. By default the last hidden layer is returned.
+        prefer_encoder: For an encoder-decoder model, prefer the embeddings provided by the encoder.
+        dtype: Data type to output
+        device: Torch device on which to run the featurizer.
+        max_length: Maximum length of the input sequence to consider. Please update this for large sequences
+        ignore_padding: Whether to ignore padding in the representation (default: True) to avoid effect of batching
+        preload: Whether to preload the model into memory or not
+        n_jobs: number of jobs to use
+        random_seed: random seed to use for reproducibility whenever a DNN pooler is used (e.g bert/roberta)
+    """
+
+    if not requires.check("transformers"):
+        raise ValueError(
+            "Cannot find transformers and/or tokenizers. It's required for this featurizer !"
+        )
+
+    super().__init__(
+        dtype=dtype,
+        device=device,
+        n_jobs=n_jobs,
+        **params,
+    )
+    if concat_layers is None:
+        concat_layers = -1
+    if not isinstance(concat_layers, list):
+        concat_layers = [concat_layers]
+    self.concat_layers = concat_layers
+    self.max_length = max_length
+    self.ignore_padding = ignore_padding
+    self._require_mols = False
+    self.random_seed = random_seed
+    self.preload = preload
+    self.pooling = pooling
+    self.prefer_encoder = prefer_encoder
+    self.device = torch.device(device)
+    self._pooling_obj = None
+    if isinstance(kind, HFModel):
+        self.kind = kind.name
+        self.featurizer = kind
+    else:
+        self.kind = kind
+        self.featurizer = HFModel(name=self.kind)
+    self.notation = self.featurizer.get_notation(notation) or "none"
+    self.converter = SmilesConverter(self.notation)
+    if self.preload:
+        self._preload()
+
+
+
+ +
+ + +
+ + + + +

+ set_max_length(max_length) + +

+ + +
+ +

Set the maximum length for this featurizer

+ +
+ Source code in molfeat/trans/pretrained/hf_transformers.py +
444
+445
+446
+447
def set_max_length(self, max_length: int):
+    """Set the maximum length for this featurizer"""
+    self.max_length = max_length
+    self._preload()
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.trans.struct.html b/0.9.7/api/molfeat.trans.struct.html new file mode 100644 index 0000000..47e3f90 --- /dev/null +++ b/0.9.7/api/molfeat.trans.struct.html @@ -0,0 +1,4090 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.trans.struct - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.trans.struct

+ +

ESM

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ESMProteinFingerprint + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

ESM (Evolutionary Scale Modeling) protein representation embedding. +ESM is a transformer protein language model introduced by Facebook FAIR in Rives et al., 2019: +'Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences'

+ +
+ Source code in molfeat/trans/struct/esm.py +
 15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
class ESMProteinFingerprint(MoleculeTransformer):
+    """
+    ESM (Evolutionary Scale Modeling) protein representation embedding.
+    ESM is a transformer protein language model introduced by Facebook FAIR in Rives et al., 2019:
+    'Biological structure and function emerge from scaling unsupervised learning to 250 million protein sequences'
+    """
+
+    def __init__(
+        self,
+        featurizer: str = "esm1b_t33_650M_UR50S",
+        loader_repo_or_dir: str = "facebookresearch/esm:main",
+        device: Optional[str] = None,
+        layers: List[int] = None,
+        pooling: str = "mean",
+        dtype: Callable = None,
+        contact: bool = False,
+        **kwargs,
+    ):
+        """Constructor for ESM protein representation
+
+        Args:
+            featurizer: Name of the ESM model to use. Defaults to "esm1b_t33_650M_UR50S".
+            loader_repo_or_dir: Path to local dir containing the model or to a github repo. Default to "facebookresearch/esm:main
+            device: Torch device to move the model to. Defaults to None.
+            layers: Layers to use to extract information. Defaults to None, which is the last layers.
+            pooling: Pooling method to use for sequence embedding. Defaults to "mean".
+                If you set pooling to None, token representation will be returned (excluding BOS)
+            dtype: Representation output datatype. Defaults to None.
+            contact: Whether to return the predictied attention contact instead of the representation. Defaults to False.
+        """
+        self._model_name = featurizer
+        self.device = device
+        self.dtype = dtype
+        self.featurizer = None
+        self.alphabet = None
+        self.batch_converter = None
+        self._fitted = True
+        self.cols_to_keep = None
+        self.repr_layers = layers
+        self.repo_or_dir = loader_repo_or_dir
+        self.contact = contact
+        max_layer_pattern = re.compile(".*_t([0-9]+)_.*")
+        self._max_layers = int(max_layer_pattern.match(featurizer).group(1))
+        if layers is None:
+            self.repr_layers = [self._max_layers]
+        if any(lay > self._max_layers for lay in self.repr_layers):
+            raise ValueError(
+                "You are requesting more layers than available for this pretrained model"
+            )
+        self._representation = "seq"
+        self.pooling = Pooling(dim=0, name=pooling)
+        if pooling is None:
+            self._representation = "token"
+        self._feat_length = None
+        self._load_model()
+
+    def _load_model(self):
+        """Load model internally"""
+        self.featurizer, self.alphabet = torch.hub.load(self.repo_or_dir, self._model_name)  # type: ignore
+        self.batch_converter = self.alphabet.get_batch_converter()
+        if self.device is not None:
+            self.featurizer = self.featurizer.to(self.device)
+        self.featurizer.eval()
+
+    def __len__(self):
+        """Get featurizer length"""
+        if self._feat_length is None and not self.contact:
+            embds = self._transform("MMMM")
+            self._feat_length = embds.shape[-1]
+        return self._feat_length
+
+    @property
+    def n_layers(self):
+        """Number of layers used in the current embeddings"""
+        return len(self.repr_layers)
+
+    @torch.no_grad()
+    def _embed(self, prot_seqs: List[str], prot_names: Optional[List[str]] = None, **kwargs):
+        r"""
+        Compute features for a single molecule.
+        This method would potentially need to be reimplemented by child classes
+
+        Args:
+           prot_seqs: protein sequences as a sequence of amino acids
+           prot_names: protein names
+
+        Returns
+            feat: list of N_SEQ representation, each of size (SEQ_LEN, FEAT_DIM * N_LAYERS) for token embeddings
+                and (FEAT_DIM * N_LAYERS) for sequence embeddings. Note that SEQ_LEN will include the stop token.
+
+        """
+        if isinstance(prot_seqs, str):
+            prot_seqs = [prot_seqs]
+        if prot_names is None:
+            prot_names = ["protein_{i}" for i in range(len(prot_seqs))]
+        if isinstance(prot_names, str):
+            prot_names = [prot_names]
+        if len(prot_seqs) != len(prot_names):
+            raise ValueError("Must provide the same number of protein sequence and label")
+        data = list(zip(prot_names, prot_seqs))
+        *_, batch_tokens = self.batch_converter(data)
+        if self.device is not None:
+            batch_tokens = batch_tokens.to(self.device)
+
+        results = self.featurizer(
+            batch_tokens, repr_layers=self.repr_layers, return_contacts=self.contact
+        )
+        embeddings = []
+        if self.contact:
+            for _, (seq, att_concats) in enumerate(zip(prot_seqs, results["contacts"])):
+                embeddings.append(att_concats[: len(seq), : len(seq)])
+        else:
+            representation = torch.stack(
+                [results["representations"][x] for x in self.repr_layers], dim=-1
+            )
+            if self._representation.startswith("seq"):
+                for seq, token_rep in zip(prot_seqs, representation):
+                    embeddings.append(
+                        self.pooling(token_rep[1 : len(seq) + 1]).view(1, -1).squeeze(0)
+                    )
+            else:
+                embeddings = list(
+                    representation.view(representation.shape[0], representation.shape[1], -1)
+                )
+        return embeddings
+
+    def __repr__(self):
+        return "{}(model={}, pooling={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self._model_name),
+            _parse_to_evaluable_str(self.pooling.name),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    def _transform(self, protein_seq: str, protein_name: str = None):
+        """
+        Transform a protein sequence into a feature vector.
+
+        Args:
+            protein: protein sequence as amino acid sequences
+            protein_name: protein name
+
+        Returns:
+            Embedding of size (SEQ_LEN, FEAT_DIM, N_LAYERS) for token embeddings
+                and (FEAT_DIM * N_LAYERS) for sequence embeddings
+        """
+        return self._embed(protein_seq, protein_name)[0]
+
+    def transform(self, seqs: List[str], names: Optional[List[str]] = None, **kwargs):
+        """
+        Transform a list of protein sequence into a feature vector.
+
+        Args:
+            seqs: list of protein sequence as amino acids
+            names: protein names
+
+        Returns:
+            Embedding of size (N_SEQS, SEQ_LEN, FEAT_DIM * N_LAYERS) for token embeddings
+                and (N_SEQS, FEAT_DIM * N_LAYERS) for sequence embeddings. Use
+        """
+        if (
+            names is None
+            and isinstance(seqs, list)
+            and isinstance(seqs[0], list)
+            and len(seqs[0]) == 2
+        ):
+            names, seqs = zip(*seqs)
+            seqs = list(seqs)
+            names = list(names)
+        return self._embed(seqs, names)
+
+    def __call__(
+        self,
+        seqs: List[str],
+        names: Optional[List[str]] = None,
+        ignore_errors: bool = False,
+        enforce_dtype: bool = True,
+        **kwargs,
+    ):
+        r"""
+        Compute molecular representation of a protein sequence.
+        If ignore_error is True, a list of features and valid ids are returned.
+
+        Args:
+            seqs: list of protein sequence as amino acids
+            names: protein names
+            enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+            ignore_errors: Whether to ignore errors during featurization or raise an error.
+            kwargs: Named parameters for the transform method
+
+        Returns:
+            feats: list of valid embeddings
+            ids: all valid positions that did not failed during featurization.
+                Only returned when ignore_errors is True.
+
+        """
+        features = self.transform(seqs, names, ignore_errors=ignore_errors, **kwargs)
+        ids = np.arange(len(features))
+        if ignore_errors:
+            features, ids = self._filter_none(features)
+        if self.dtype is not None and enforce_dtype:
+            if self.contact or not self._representation.startswith("seq"):
+                features = [
+                    datatype.cast(feat, dtype=self.dtype, columns=self.columns) for feat in features
+                ]
+            else:
+                features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+        if not ignore_errors:
+            return features
+        return features, ids
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ n_layers + + + property + + +

+ + +
+ +

Number of layers used in the current embeddings

+
+ +
+ + + + +
+ + + + +

+ __call__(seqs, names=None, ignore_errors=False, enforce_dtype=True, **kwargs) + +

+ + +
+ +

Compute molecular representation of a protein sequence. +If ignore_error is True, a list of features and valid ids are returned.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
seqs + List[str] + +
+

list of protein sequence as amino acids

+
+
+ required +
names + Optional[List[str]] + +
+

protein names

+
+
+ None +
enforce_dtype + bool + +
+

whether to enforce the instance dtype in the generated fingerprint

+
+
+ True +
ignore_errors + bool + +
+

Whether to ignore errors during featurization or raise an error.

+
+
+ False +
kwargs + +
+

Named parameters for the transform method

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
feats + +
+

list of valid embeddings

+
+
ids + +
+

all valid positions that did not failed during featurization. +Only returned when ignore_errors is True.

+
+
+ +
+ Source code in molfeat/trans/struct/esm.py +
186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
def __call__(
+    self,
+    seqs: List[str],
+    names: Optional[List[str]] = None,
+    ignore_errors: bool = False,
+    enforce_dtype: bool = True,
+    **kwargs,
+):
+    r"""
+    Compute molecular representation of a protein sequence.
+    If ignore_error is True, a list of features and valid ids are returned.
+
+    Args:
+        seqs: list of protein sequence as amino acids
+        names: protein names
+        enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+        ignore_errors: Whether to ignore errors during featurization or raise an error.
+        kwargs: Named parameters for the transform method
+
+    Returns:
+        feats: list of valid embeddings
+        ids: all valid positions that did not failed during featurization.
+            Only returned when ignore_errors is True.
+
+    """
+    features = self.transform(seqs, names, ignore_errors=ignore_errors, **kwargs)
+    ids = np.arange(len(features))
+    if ignore_errors:
+        features, ids = self._filter_none(features)
+    if self.dtype is not None and enforce_dtype:
+        if self.contact or not self._representation.startswith("seq"):
+            features = [
+                datatype.cast(feat, dtype=self.dtype, columns=self.columns) for feat in features
+            ]
+        else:
+            features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+    if not ignore_errors:
+        return features
+    return features, ids
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer='esm1b_t33_650M_UR50S', loader_repo_or_dir='facebookresearch/esm:main', device=None, layers=None, pooling='mean', dtype=None, contact=False, **kwargs) + +

+ + +
+ +

Constructor for ESM protein representation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer + str + +
+

Name of the ESM model to use. Defaults to "esm1b_t33_650M_UR50S".

+
+
+ 'esm1b_t33_650M_UR50S' +
loader_repo_or_dir + str + +
+

Path to local dir containing the model or to a github repo. Default to "facebookresearch/esm:main

+
+
+ 'facebookresearch/esm:main' +
device + Optional[str] + +
+

Torch device to move the model to. Defaults to None.

+
+
+ None +
layers + List[int] + +
+

Layers to use to extract information. Defaults to None, which is the last layers.

+
+
+ None +
pooling + str + +
+

Pooling method to use for sequence embedding. Defaults to "mean". +If you set pooling to None, token representation will be returned (excluding BOS)

+
+
+ 'mean' +
dtype + Callable + +
+

Representation output datatype. Defaults to None.

+
+
+ None +
contact + bool + +
+

Whether to return the predictied attention contact instead of the representation. Defaults to False.

+
+
+ False +
+ +
+ Source code in molfeat/trans/struct/esm.py +
22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
def __init__(
+    self,
+    featurizer: str = "esm1b_t33_650M_UR50S",
+    loader_repo_or_dir: str = "facebookresearch/esm:main",
+    device: Optional[str] = None,
+    layers: List[int] = None,
+    pooling: str = "mean",
+    dtype: Callable = None,
+    contact: bool = False,
+    **kwargs,
+):
+    """Constructor for ESM protein representation
+
+    Args:
+        featurizer: Name of the ESM model to use. Defaults to "esm1b_t33_650M_UR50S".
+        loader_repo_or_dir: Path to local dir containing the model or to a github repo. Default to "facebookresearch/esm:main
+        device: Torch device to move the model to. Defaults to None.
+        layers: Layers to use to extract information. Defaults to None, which is the last layers.
+        pooling: Pooling method to use for sequence embedding. Defaults to "mean".
+            If you set pooling to None, token representation will be returned (excluding BOS)
+        dtype: Representation output datatype. Defaults to None.
+        contact: Whether to return the predictied attention contact instead of the representation. Defaults to False.
+    """
+    self._model_name = featurizer
+    self.device = device
+    self.dtype = dtype
+    self.featurizer = None
+    self.alphabet = None
+    self.batch_converter = None
+    self._fitted = True
+    self.cols_to_keep = None
+    self.repr_layers = layers
+    self.repo_or_dir = loader_repo_or_dir
+    self.contact = contact
+    max_layer_pattern = re.compile(".*_t([0-9]+)_.*")
+    self._max_layers = int(max_layer_pattern.match(featurizer).group(1))
+    if layers is None:
+        self.repr_layers = [self._max_layers]
+    if any(lay > self._max_layers for lay in self.repr_layers):
+        raise ValueError(
+            "You are requesting more layers than available for this pretrained model"
+        )
+    self._representation = "seq"
+    self.pooling = Pooling(dim=0, name=pooling)
+    if pooling is None:
+        self._representation = "token"
+    self._feat_length = None
+    self._load_model()
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Get featurizer length

+ +
+ Source code in molfeat/trans/struct/esm.py +
79
+80
+81
+82
+83
+84
def __len__(self):
+    """Get featurizer length"""
+    if self._feat_length is None and not self.contact:
+        embds = self._transform("MMMM")
+        self._feat_length = embds.shape[-1]
+    return self._feat_length
+
+
+
+ +
+ + +
+ + + + +

+ transform(seqs, names=None, **kwargs) + +

+ + +
+ +

Transform a list of protein sequence into a feature vector.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
seqs + List[str] + +
+

list of protein sequence as amino acids

+
+
+ required +
names + Optional[List[str]] + +
+

protein names

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

Embedding of size (N_SEQS, SEQ_LEN, FEAT_DIM * N_LAYERS) for token embeddings +and (N_SEQS, FEAT_DIM * N_LAYERS) for sequence embeddings. Use

+
+
+ +
+ Source code in molfeat/trans/struct/esm.py +
163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
def transform(self, seqs: List[str], names: Optional[List[str]] = None, **kwargs):
+    """
+    Transform a list of protein sequence into a feature vector.
+
+    Args:
+        seqs: list of protein sequence as amino acids
+        names: protein names
+
+    Returns:
+        Embedding of size (N_SEQS, SEQ_LEN, FEAT_DIM * N_LAYERS) for token embeddings
+            and (N_SEQS, FEAT_DIM * N_LAYERS) for sequence embeddings. Use
+    """
+    if (
+        names is None
+        and isinstance(seqs, list)
+        and isinstance(seqs[0], list)
+        and len(seqs[0]) == 2
+    ):
+        names, seqs = zip(*seqs)
+        seqs = list(seqs)
+        names = list(names)
+    return self._embed(seqs, names)
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Bio Embeddings

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ ProtBioFingerprint + + +

+ + +
+

+ Bases: MoleculeTransformer

+ + +

Wrapper for general purpose biological sequence representations, as provided by bio_embeddings

+

For a list of available embeddings, see: https://docs.bioembeddings.com/v0.2.2/api/bio_embeddings.embed.html

+

!!! note: + The embeddings proposed here are the general purpose embeddings, meaning that task-specific + embeddings offered by bio_embeddings (e.g PBTucker, DeepBlast) are not included.

+
According to the bio_embeddings documentation, `prottrans_bert_bfd` and `seqvec` are the best embeddings.
+
+ +
+ Source code in molfeat/trans/struct/prot1D.py +
 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ 37
+ 38
+ 39
+ 40
+ 41
+ 42
+ 43
+ 44
+ 45
+ 46
+ 47
+ 48
+ 49
+ 50
+ 51
+ 52
+ 53
+ 54
+ 55
+ 56
+ 57
+ 58
+ 59
+ 60
+ 61
+ 62
+ 63
+ 64
+ 65
+ 66
+ 67
+ 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
class ProtBioFingerprint(MoleculeTransformer):
+    """
+    Wrapper for general purpose biological sequence representations, as provided by [`bio_embeddings`](https://github.com/sacdallago/bio_embeddings)
+
+    For a list of available embeddings, see: https://docs.bioembeddings.com/v0.2.2/api/bio_embeddings.embed.html
+
+    !!! note:
+        The embeddings proposed here are the general purpose embeddings, meaning that task-specific
+        embeddings offered by `bio_embeddings` (e.g PBTucker, DeepBlast) are not included.
+
+        According to the bio_embeddings documentation, `prottrans_bert_bfd` and `seqvec` are the best embeddings.
+    """
+
+    SUPPORTED_EMBEDDINGS = [
+        "bepler",
+        "cpcprot",
+        "esm",
+        "esm1b",
+        "esm1v",
+        "fasttext",
+        "glove",
+        "one_hot_encoding",
+        "plus_rnn",
+        "prottrans_albert_bfd",
+        "prottrans_bert_bfd",
+        "prottrans_t5_bfd",
+        "prottrans_t5_uniref50",
+        "prottrans_t5_xl_u50",
+        "prottrans_xlnet_uniref100",
+        "seqvec",
+        "unirep",
+        "word2vec",
+    ]
+
+    def __init__(
+        self,
+        featurizer: Union[str, Callable] = "seqvec",
+        pooling: str = "mean",
+        dtype: Callable = np.float32,
+        device: Optional[Union[torch.device, str]] = None,
+        layer_pooling: str = "sum",
+        **kwargs,
+    ):
+        """Constructor for Deep Learning based Protein representation.
+        SeqVec featurizer will e
+
+        Args:
+            featurizer: Name of callable of the embedding model
+            pooling: Pooling method to use for sequence embedding. Defaults to "mean".
+                If you set pooling to None, token representation will be returned
+            dtype: Representation output datatype. Defaults to None.
+            device: Torch device to move the model to. Defaults to None.
+            layer_pooling: Layer-wise pooling method to use when > 1 layer exists. Default to 'sum'.
+                If None, last layers is taken. This is relevant for `seqvec` mostly
+        """
+        if not requires.check("bio_embeddings"):
+            raise ValueError(
+                "Cannot use this featurizer without bio_embeddings (pip install 'bio_embeddings[all]')."
+            )
+
+        if isinstance(featurizer, bio_embedder.EmbedderInterface):
+            featurizer = featurizer
+            self._model_name = self.featurizer.name
+        else:
+            if (
+                not isinstance(featurizer, str)
+                or featurizer.lower() not in self.SUPPORTED_EMBEDDINGS
+            ):
+                raise ValueError("Unknown featurizer: {}".format(featurizer))
+            self._model_name = featurizer.lower()
+            featurizer = bio_embedder.name_to_embedder[self._model_name](device=device, **kwargs)
+
+        super().__init__(featurizer=featurizer, dtype=dtype, **kwargs)
+        self._fitted = True
+        self._representation = "seq"
+        self.pooling = Pooling(dim=0, name=pooling)
+        self.layer_pooling = Pooling(dim=0, name=layer_pooling)
+        if pooling is None:
+            self._representation = "token"
+        self._feat_length = None
+
+    def __len__(self):
+        """Get featurizer length"""
+        return self.featurizer.embedding_dimension
+
+    @property
+    def n_layers(self):
+        """Get the number of layers used in this embedding"""
+        return self.featurizer.number_of_layers
+
+    def __repr__(self):
+        return "{}(model={}, pooling={}, dtype={})".format(
+            self.__class__.__name__,
+            _parse_to_evaluable_str(self._model_name),
+            _parse_to_evaluable_str(self.pooling.name),
+            _parse_to_evaluable_str(self.dtype),
+        )
+
+    def _pool(self, embedding: list):
+        """Perform embedding pooling
+        Args:
+            embedding: input embedding
+        """
+        if self.n_layers > 1 and self.layer_pooling.name is not None:
+            embedding = self.layer_pooling(embedding)
+        if len(embedding.shape) > 2:
+            # we forcefully take the last layers
+            embedding = embedding[-1]
+        return self.pooling(embedding)
+
+    def _transform(
+        self,
+        protein_seq: str,
+        **kwargs,
+    ):
+        """
+        Transform a protein/nucleotide sequence into a feature vector.
+
+        Args:
+            protein: protein sequence as amino acid sequences
+
+        Returns:
+            Embedding of size (FEAT_DIM, N_LAYERS) for token embeddings
+                and (FEAT_DIM, N_LAYERS) for sequence embeddings
+        """
+
+        rep = self.featurizer.embed(protein_seq)
+        return self._pool(rep)
+
+    def transform(self, seqs: List[str], names: Optional[List[str]] = None, **kwargs):
+        """
+        Transform a list of protein/nucleotide sequence into a feature vector.
+
+        Args:
+            seqs: list of protein/nucleotide sequence as amino acids
+            names: names of the macromolecules.  Will be ignored
+            kwargs: additional arguments for the featurizer
+
+        Returns:
+            Embedding of size (N_SEQS, FEAT_DIM) for token embeddings
+                and (FEAT_DIM, N_LAYERS) for sequence embeddings
+        """
+        if not isinstance(seqs, list):
+            seqs = [seqs]
+        if isinstance(seqs[0], (list, tuple)) and len(seqs[0]) == 2:
+            _, seqs = zip(*seqs)
+            seqs = list(seqs)
+        res = list(self.featurizer.embed_many(seqs, **kwargs))
+        res = [self._pool(x) for x in res]
+        return res
+
+    def __call__(
+        self,
+        seqs: List[str],
+        ignore_errors: bool = False,
+        enforce_dtype: bool = True,
+        **kwargs,
+    ):
+        r"""
+        Compute molecular representation of a protein sequence.
+        If ignore_error is True, a list of features and valid ids are returned.
+
+        Args:
+            seqs: list of protein or nucleotide sequence as amino acids
+            enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+            ignore_errors: Whether to ignore errors during featurization or raise an error.
+            kwargs: Named parameters for the transform method
+
+        Returns:
+            feats: list of valid embeddings
+            ids: all valid positions that did not failed during featurization.
+                Only returned when ignore_errors is True.
+
+        """
+        features = self.transform(seqs, **kwargs)
+        ids = np.arange(len(features))
+        if ignore_errors:
+            features, ids = self._filter_none(features)
+        if self.dtype is not None and enforce_dtype:
+            if self._representation.startswith("token"):
+                features = [
+                    datatype.cast(feat, dtype=self.dtype, columns=self.columns) for feat in features
+                ]
+            else:
+                features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+        if not ignore_errors:
+            return features
+        return features, ids
+
+
+ + + +
+ + + + + + + +
+ + + + +

+ n_layers + + + property + + +

+ + +
+ +

Get the number of layers used in this embedding

+
+ +
+ + + + +
+ + + + +

+ __call__(seqs, ignore_errors=False, enforce_dtype=True, **kwargs) + +

+ + +
+ +

Compute molecular representation of a protein sequence. +If ignore_error is True, a list of features and valid ids are returned.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
seqs + List[str] + +
+

list of protein or nucleotide sequence as amino acids

+
+
+ required +
enforce_dtype + bool + +
+

whether to enforce the instance dtype in the generated fingerprint

+
+
+ True +
ignore_errors + bool + +
+

Whether to ignore errors during featurization or raise an error.

+
+
+ False +
kwargs + +
+

Named parameters for the transform method

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
feats + +
+

list of valid embeddings

+
+
ids + +
+

all valid positions that did not failed during featurization. +Only returned when ignore_errors is True.

+
+
+ +
+ Source code in molfeat/trans/struct/prot1D.py +
169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
def __call__(
+    self,
+    seqs: List[str],
+    ignore_errors: bool = False,
+    enforce_dtype: bool = True,
+    **kwargs,
+):
+    r"""
+    Compute molecular representation of a protein sequence.
+    If ignore_error is True, a list of features and valid ids are returned.
+
+    Args:
+        seqs: list of protein or nucleotide sequence as amino acids
+        enforce_dtype: whether to enforce the instance dtype in the generated fingerprint
+        ignore_errors: Whether to ignore errors during featurization or raise an error.
+        kwargs: Named parameters for the transform method
+
+    Returns:
+        feats: list of valid embeddings
+        ids: all valid positions that did not failed during featurization.
+            Only returned when ignore_errors is True.
+
+    """
+    features = self.transform(seqs, **kwargs)
+    ids = np.arange(len(features))
+    if ignore_errors:
+        features, ids = self._filter_none(features)
+    if self.dtype is not None and enforce_dtype:
+        if self._representation.startswith("token"):
+            features = [
+                datatype.cast(feat, dtype=self.dtype, columns=self.columns) for feat in features
+            ]
+        else:
+            features = datatype.cast(features, dtype=self.dtype, columns=self.columns)
+    if not ignore_errors:
+        return features
+    return features, ids
+
+
+
+ +
+ + +
+ + + + +

+ __init__(featurizer='seqvec', pooling='mean', dtype=np.float32, device=None, layer_pooling='sum', **kwargs) + +

+ + +
+ +

Constructor for Deep Learning based Protein representation. +SeqVec featurizer will e

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
featurizer + Union[str, Callable] + +
+

Name of callable of the embedding model

+
+
+ 'seqvec' +
pooling + str + +
+

Pooling method to use for sequence embedding. Defaults to "mean". +If you set pooling to None, token representation will be returned

+
+
+ 'mean' +
dtype + Callable + +
+

Representation output datatype. Defaults to None.

+
+
+ float32 +
device + Optional[Union[device, str]] + +
+

Torch device to move the model to. Defaults to None.

+
+
+ None +
layer_pooling + str + +
+

Layer-wise pooling method to use when > 1 layer exists. Default to 'sum'. +If None, last layers is taken. This is relevant for seqvec mostly

+
+
+ 'sum' +
+ +
+ Source code in molfeat/trans/struct/prot1D.py +
52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
+97
def __init__(
+    self,
+    featurizer: Union[str, Callable] = "seqvec",
+    pooling: str = "mean",
+    dtype: Callable = np.float32,
+    device: Optional[Union[torch.device, str]] = None,
+    layer_pooling: str = "sum",
+    **kwargs,
+):
+    """Constructor for Deep Learning based Protein representation.
+    SeqVec featurizer will e
+
+    Args:
+        featurizer: Name of callable of the embedding model
+        pooling: Pooling method to use for sequence embedding. Defaults to "mean".
+            If you set pooling to None, token representation will be returned
+        dtype: Representation output datatype. Defaults to None.
+        device: Torch device to move the model to. Defaults to None.
+        layer_pooling: Layer-wise pooling method to use when > 1 layer exists. Default to 'sum'.
+            If None, last layers is taken. This is relevant for `seqvec` mostly
+    """
+    if not requires.check("bio_embeddings"):
+        raise ValueError(
+            "Cannot use this featurizer without bio_embeddings (pip install 'bio_embeddings[all]')."
+        )
+
+    if isinstance(featurizer, bio_embedder.EmbedderInterface):
+        featurizer = featurizer
+        self._model_name = self.featurizer.name
+    else:
+        if (
+            not isinstance(featurizer, str)
+            or featurizer.lower() not in self.SUPPORTED_EMBEDDINGS
+        ):
+            raise ValueError("Unknown featurizer: {}".format(featurizer))
+        self._model_name = featurizer.lower()
+        featurizer = bio_embedder.name_to_embedder[self._model_name](device=device, **kwargs)
+
+    super().__init__(featurizer=featurizer, dtype=dtype, **kwargs)
+    self._fitted = True
+    self._representation = "seq"
+    self.pooling = Pooling(dim=0, name=pooling)
+    self.layer_pooling = Pooling(dim=0, name=layer_pooling)
+    if pooling is None:
+        self._representation = "token"
+    self._feat_length = None
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Get featurizer length

+ +
+ Source code in molfeat/trans/struct/prot1D.py +
 99
+100
+101
def __len__(self):
+    """Get featurizer length"""
+    return self.featurizer.embedding_dimension
+
+
+
+ +
+ + +
+ + + + +

+ transform(seqs, names=None, **kwargs) + +

+ + +
+ +

Transform a list of protein/nucleotide sequence into a feature vector.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
seqs + List[str] + +
+

list of protein/nucleotide sequence as amino acids

+
+
+ required +
names + Optional[List[str]] + +
+

names of the macromolecules. Will be ignored

+
+
+ None +
kwargs + +
+

additional arguments for the featurizer

+
+
+ {} +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

Embedding of size (N_SEQS, FEAT_DIM) for token embeddings +and (FEAT_DIM, N_LAYERS) for sequence embeddings

+
+
+ +
+ Source code in molfeat/trans/struct/prot1D.py +
147
+148
+149
+150
+151
+152
+153
+154
+155
+156
+157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
def transform(self, seqs: List[str], names: Optional[List[str]] = None, **kwargs):
+    """
+    Transform a list of protein/nucleotide sequence into a feature vector.
+
+    Args:
+        seqs: list of protein/nucleotide sequence as amino acids
+        names: names of the macromolecules.  Will be ignored
+        kwargs: additional arguments for the featurizer
+
+    Returns:
+        Embedding of size (N_SEQS, FEAT_DIM) for token embeddings
+            and (FEAT_DIM, N_LAYERS) for sequence embeddings
+    """
+    if not isinstance(seqs, list):
+        seqs = [seqs]
+    if isinstance(seqs[0], (list, tuple)) and len(seqs[0]) == 2:
+        _, seqs = zip(*seqs)
+        seqs = list(seqs)
+    res = list(self.featurizer.embed_many(seqs, **kwargs))
+    res = [self._pool(x) for x in res]
+    return res
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.utils.html b/0.9.7/api/molfeat.utils.html new file mode 100644 index 0000000..d68de66 --- /dev/null +++ b/0.9.7/api/molfeat.utils.html @@ -0,0 +1,12413 @@ + + + + + + + + + + + + + + + + + + + + + + + + + molfeat.utils - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.utils

+ +

Cache

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ CacheList + + +

+ + +
+ + +

Proxy for supporting search using a list of cache

+ +
+ Source code in molfeat/utils/cache.py +
689
+690
+691
+692
+693
+694
+695
+696
+697
+698
+699
+700
+701
+702
+703
+704
+705
+706
+707
+708
+709
+710
+711
+712
+713
+714
+715
+716
+717
+718
+719
+720
+721
+722
+723
+724
+725
+726
+727
+728
+729
+730
+731
+732
+733
+734
+735
+736
+737
+738
+739
+740
+741
+742
+743
+744
+745
+746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
+757
+758
+759
+760
+761
+762
+763
+764
+765
+766
+767
+768
+769
+770
+771
+772
+773
+774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
class CacheList:
+    """Proxy for supporting search using a list of cache"""
+
+    def __init__(self, *caches):
+        self.caches = caches
+
+    def __getitem__(self, key):
+        for cache in self.caches:
+            val = cache.get(key)
+            if val is not None:
+                return val
+        raise KeyError(f"{key} not found in any cache")
+
+    def __contains__(self, key: Any):
+        """Check whether a key is in the cache
+        Args:
+            key: key to check in the cache
+        """
+        return any(key in cache for cache in self.caches)
+
+    def __len__(self):
+        """Return the length of the cache"""
+        return sum(len(c) for c in self.caches)
+
+    def __iter__(self):
+        """Iterate over all the caches"""
+        return itertools.chain(*iter(self.cache))
+
+    def __setitem__(self, key: Any, item: Any):
+        """Add an item to the cache
+
+        Args:
+            key: input key to set
+            item: value of the key to set
+        """
+        # select a random cache and add the item to the cache
+        cache = random.choice(self.caches)
+        cache.update({key: item})
+
+    def __call__(self, *args, **kwargs):
+        """
+        Compute the features for a list of molecules and save them to the cache
+        """
+
+        raise NotImplementedError(
+            "Dynamic updating of a cache list using a featurizer is not supported!"
+        )
+
+    def clear(self, *args, **kwargs):
+        """Clear all the caches and make them inaccesible"""
+        for cache in self.caches:
+            cache.clear(*args, **kwargs)
+
+    def update(self, new_cache: Mapping[Any, Any]):
+        cache = random.choice(self.caches)
+        cache.update(new_cache)
+
+    def get(self, key, default: Optional[Any] = None):
+        """Get the cached value for a specific key
+        Args:
+            key: key to get
+            default: default value to return when the key is not found
+        """
+        for cache in self.caches:
+            val = cache.get(key)
+            if val is not None:
+                return val
+        return default
+
+    def keys(self):
+        """Get list of keys in the cache"""
+        return list(itertools.chain(*(c.keys() for c in self.caches)))
+
+    def values(self):
+        """Get list of values in the cache"""
+        return list(itertools.chain(*(c.values() for c in self.caches)))
+
+    def items(self):
+        """Return iterator of key, values in the cache"""
+        return list(itertools.chain(*(c.items() for c in self.caches)))
+
+    def to_dict(self):
+        """Convert current cache to a dictionary"""
+        return dict(self.items())
+
+    def fetch(
+        self,
+        mols: List[Union[dm.Mol, str]],
+    ):
+        """Get the representation for a single
+
+        Args:
+            mols: list of molecules
+        """
+        if isinstance(mols, str) or not isinstance(mols, Iterable):
+            mols = [mols]
+        return [self.get(mol) for mol in mols]
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(*args, **kwargs) + +

+ + +
+ +

Compute the features for a list of molecules and save them to the cache

+ +
+ Source code in molfeat/utils/cache.py +
728
+729
+730
+731
+732
+733
+734
+735
def __call__(self, *args, **kwargs):
+    """
+    Compute the features for a list of molecules and save them to the cache
+    """
+
+    raise NotImplementedError(
+        "Dynamic updating of a cache list using a featurizer is not supported!"
+    )
+
+
+
+ +
+ + +
+ + + + +

+ __contains__(key) + +

+ + +
+ +

Check whether a key is in the cache +Args: + key: key to check in the cache

+ +
+ Source code in molfeat/utils/cache.py +
702
+703
+704
+705
+706
+707
def __contains__(self, key: Any):
+    """Check whether a key is in the cache
+    Args:
+        key: key to check in the cache
+    """
+    return any(key in cache for cache in self.caches)
+
+
+
+ +
+ + +
+ + + + +

+ __iter__() + +

+ + +
+ +

Iterate over all the caches

+ +
+ Source code in molfeat/utils/cache.py +
713
+714
+715
def __iter__(self):
+    """Iterate over all the caches"""
+    return itertools.chain(*iter(self.cache))
+
+
+
+ +
+ + +
+ + + + +

+ __len__() + +

+ + +
+ +

Return the length of the cache

+ +
+ Source code in molfeat/utils/cache.py +
709
+710
+711
def __len__(self):
+    """Return the length of the cache"""
+    return sum(len(c) for c in self.caches)
+
+
+
+ +
+ + +
+ + + + +

+ __setitem__(key, item) + +

+ + +
+ +

Add an item to the cache

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
key + Any + +
+

input key to set

+
+
+ required +
item + Any + +
+

value of the key to set

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
717
+718
+719
+720
+721
+722
+723
+724
+725
+726
def __setitem__(self, key: Any, item: Any):
+    """Add an item to the cache
+
+    Args:
+        key: input key to set
+        item: value of the key to set
+    """
+    # select a random cache and add the item to the cache
+    cache = random.choice(self.caches)
+    cache.update({key: item})
+
+
+
+ +
+ + +
+ + + + +

+ clear(*args, **kwargs) + +

+ + +
+ +

Clear all the caches and make them inaccesible

+ +
+ Source code in molfeat/utils/cache.py +
737
+738
+739
+740
def clear(self, *args, **kwargs):
+    """Clear all the caches and make them inaccesible"""
+    for cache in self.caches:
+        cache.clear(*args, **kwargs)
+
+
+
+ +
+ + +
+ + + + +

+ fetch(mols) + +

+ + +
+ +

Get the representation for a single

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Union[Mol, str]] + +
+

list of molecules

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
774
+775
+776
+777
+778
+779
+780
+781
+782
+783
+784
+785
def fetch(
+    self,
+    mols: List[Union[dm.Mol, str]],
+):
+    """Get the representation for a single
+
+    Args:
+        mols: list of molecules
+    """
+    if isinstance(mols, str) or not isinstance(mols, Iterable):
+        mols = [mols]
+    return [self.get(mol) for mol in mols]
+
+
+
+ +
+ + +
+ + + + +

+ get(key, default=None) + +

+ + +
+ +

Get the cached value for a specific key +Args: + key: key to get + default: default value to return when the key is not found

+ +
+ Source code in molfeat/utils/cache.py +
746
+747
+748
+749
+750
+751
+752
+753
+754
+755
+756
def get(self, key, default: Optional[Any] = None):
+    """Get the cached value for a specific key
+    Args:
+        key: key to get
+        default: default value to return when the key is not found
+    """
+    for cache in self.caches:
+        val = cache.get(key)
+        if val is not None:
+            return val
+    return default
+
+
+
+ +
+ + +
+ + + + +

+ items() + +

+ + +
+ +

Return iterator of key, values in the cache

+ +
+ Source code in molfeat/utils/cache.py +
766
+767
+768
def items(self):
+    """Return iterator of key, values in the cache"""
+    return list(itertools.chain(*(c.items() for c in self.caches)))
+
+
+
+ +
+ + +
+ + + + +

+ keys() + +

+ + +
+ +

Get list of keys in the cache

+ +
+ Source code in molfeat/utils/cache.py +
758
+759
+760
def keys(self):
+    """Get list of keys in the cache"""
+    return list(itertools.chain(*(c.keys() for c in self.caches)))
+
+
+
+ +
+ + +
+ + + + +

+ to_dict() + +

+ + +
+ +

Convert current cache to a dictionary

+ +
+ Source code in molfeat/utils/cache.py +
770
+771
+772
def to_dict(self):
+    """Convert current cache to a dictionary"""
+    return dict(self.items())
+
+
+
+ +
+ + +
+ + + + +

+ values() + +

+ + +
+ +

Get list of values in the cache

+ +
+ Source code in molfeat/utils/cache.py +
762
+763
+764
def values(self):
+    """Get list of values in the cache"""
+    return list(itertools.chain(*(c.values() for c in self.caches)))
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ DataCache + + +

+ + +
+

+ Bases: _Cache

+ + +

Molecular features caching system that cache computed values in memory for reuse later

+ +
+ Source code in molfeat/utils/cache.py +
291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
+364
+365
+366
+367
+368
+369
+370
+371
+372
+373
+374
+375
+376
+377
+378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
class DataCache(_Cache):
+    """
+    Molecular features caching system that cache computed values in memory for reuse later
+    """
+
+    def __init__(
+        self,
+        name: str,
+        n_jobs: int = -1,
+        mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+        verbose: Union[bool, int] = False,
+        cache_file: Optional[Union[os.PathLike, bool]] = None,
+        delete_on_exit: bool = False,
+        clear_on_exit: bool = True,
+    ):
+        """Precomputed fingerprint caching callback
+
+        Args:
+            name: name of the cache
+            n_jobs: number of parallel jobs to use when performing any computation
+            mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id`` is used by default
+            verbose: whether to print progress. Default to False
+            cache_file: Cache location. Defaults to None, which will use in-memory caching.
+            delete_on_exit: Whether to delete the cache file on exit. Defaults to False.
+            clear_on_exit: Whether to clear the cache on exit of the interpreter. Default to True
+        """
+        super().__init__(name=name, mol_hasher=mol_hasher, n_jobs=n_jobs, verbose=verbose)
+
+        if cache_file is True:
+            cache_file = pathlib.Path(
+                platformdirs.user_cache_dir(appname="molfeat")
+            ) / "precomputed/{}_{}.db".format(self.name, str(uuid.uuid4())[:8])
+
+            cache_file = str(cache_file)
+        self.cache_file = cache_file
+        self.cache = {}
+        self._initialize_cache()
+        self.delete_on_exit = delete_on_exit
+        self.clear_on_exit = clear_on_exit
+        if self.clear_on_exit:
+            atexit.register(partial(self.clear, delete=delete_on_exit))
+
+    def _initialize_cache(self):
+        if self.cache_file not in [None, False]:
+            # force creation of cache directory
+            cache_parent = pathlib.Path(self.cache_file).parent
+            cache_parent.mkdir(parents=True, exist_ok=True)
+            self.cache = shelve.open(self.cache_file)
+        else:
+            self.cache = {}
+
+    def clear(self, delete: bool = False):
+        """Clear cache memory if needed.
+        Note that a cleared cache cannot be used anymore
+
+        Args:
+            delete: whether to delete the cache file if on disk
+        """
+        self.cache.clear()
+        if isinstance(self.cache, shelve.Shelf):
+            self.cache.close()
+            # EN: temporary set it to a dict before reopening
+            # this needs to be done to prevent operating on close files
+            self.cache = {}
+        if delete:
+            if self.cache_file is not None:
+                for path in glob.glob(str(self.cache_file) + "*"):
+                    try:
+                        os.unlink(path)
+                    except Exception:  # noqa
+                        pass
+        else:
+            self._initialize_cache()
+
+    def update(self, new_cache: Mapping[Any, Any]):
+        """Update the cache with new values
+
+        Args:
+            new_cache: new cache with items to use to update current cache
+        """
+        for k, v in new_cache.items():
+            k = self.mol_hasher(k)
+            self.cache[k] = v
+        return self
+
+    def _sync_cache(self):
+        """Perform a cache sync to ensure values are up to date"""
+        if isinstance(self.cache, shelve.Shelf):
+            self.cache.sync()
+
+    @classmethod
+    def load_from_file(cls, filepath: Union[os.PathLike, str]):
+        """Load a datache from a file (including remote file)
+
+        Args:
+            filepath: path to the file to load
+        """
+        cached_data = None
+        with fsspec.open(filepath, "rb") as f:
+            cached_data = joblib.load(f)
+        data = cached_data.pop("data", {})
+        new_cache = cls(**cached_data)
+        new_cache.update(data)
+        return new_cache
+
+    def save_to_file(self, filepath: Union[os.PathLike, str]):
+        """Save the cache to a file
+
+        Args:
+            filepath: path to the file to save
+        """
+        information = dict(
+            name=self.name,
+            n_jobs=self.n_jobs,
+            mol_hasher=self.mol_hasher,
+            verbose=self.verbose,
+            cache_file=(self.cache_file is not None),
+            delete_on_exit=self.delete_on_exit,
+        )
+        information["data"] = self.to_dict()
+        with fsspec.open(filepath, "wb") as f:
+            joblib.dump(information, f)
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(name, n_jobs=-1, mol_hasher=None, verbose=False, cache_file=None, delete_on_exit=False, clear_on_exit=True) + +

+ + +
+ +

Precomputed fingerprint caching callback

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + str + +
+

name of the cache

+
+
+ required +
n_jobs + int + +
+

number of parallel jobs to use when performing any computation

+
+
+ -1 +
mol_hasher + Optional[Union[Callable, str, MolToKey]] + +
+

function to use to hash molecules. If not provided, `dm.unique_id`` is used by default

+
+
+ None +
verbose + Union[bool, int] + +
+

whether to print progress. Default to False

+
+
+ False +
cache_file + Optional[Union[PathLike, bool]] + +
+

Cache location. Defaults to None, which will use in-memory caching.

+
+
+ None +
delete_on_exit + bool + +
+

Whether to delete the cache file on exit. Defaults to False.

+
+
+ False +
clear_on_exit + bool + +
+

Whether to clear the cache on exit of the interpreter. Default to True

+
+
+ True +
+ +
+ Source code in molfeat/utils/cache.py +
296
+297
+298
+299
+300
+301
+302
+303
+304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
+322
+323
+324
+325
+326
+327
+328
+329
+330
+331
def __init__(
+    self,
+    name: str,
+    n_jobs: int = -1,
+    mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+    verbose: Union[bool, int] = False,
+    cache_file: Optional[Union[os.PathLike, bool]] = None,
+    delete_on_exit: bool = False,
+    clear_on_exit: bool = True,
+):
+    """Precomputed fingerprint caching callback
+
+    Args:
+        name: name of the cache
+        n_jobs: number of parallel jobs to use when performing any computation
+        mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id`` is used by default
+        verbose: whether to print progress. Default to False
+        cache_file: Cache location. Defaults to None, which will use in-memory caching.
+        delete_on_exit: Whether to delete the cache file on exit. Defaults to False.
+        clear_on_exit: Whether to clear the cache on exit of the interpreter. Default to True
+    """
+    super().__init__(name=name, mol_hasher=mol_hasher, n_jobs=n_jobs, verbose=verbose)
+
+    if cache_file is True:
+        cache_file = pathlib.Path(
+            platformdirs.user_cache_dir(appname="molfeat")
+        ) / "precomputed/{}_{}.db".format(self.name, str(uuid.uuid4())[:8])
+
+        cache_file = str(cache_file)
+    self.cache_file = cache_file
+    self.cache = {}
+    self._initialize_cache()
+    self.delete_on_exit = delete_on_exit
+    self.clear_on_exit = clear_on_exit
+    if self.clear_on_exit:
+        atexit.register(partial(self.clear, delete=delete_on_exit))
+
+
+
+ +
+ + +
+ + + + +

+ clear(delete=False) + +

+ + +
+ +

Clear cache memory if needed. +Note that a cleared cache cannot be used anymore

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
delete + bool + +
+

whether to delete the cache file if on disk

+
+
+ False +
+ +
+ Source code in molfeat/utils/cache.py +
342
+343
+344
+345
+346
+347
+348
+349
+350
+351
+352
+353
+354
+355
+356
+357
+358
+359
+360
+361
+362
+363
def clear(self, delete: bool = False):
+    """Clear cache memory if needed.
+    Note that a cleared cache cannot be used anymore
+
+    Args:
+        delete: whether to delete the cache file if on disk
+    """
+    self.cache.clear()
+    if isinstance(self.cache, shelve.Shelf):
+        self.cache.close()
+        # EN: temporary set it to a dict before reopening
+        # this needs to be done to prevent operating on close files
+        self.cache = {}
+    if delete:
+        if self.cache_file is not None:
+            for path in glob.glob(str(self.cache_file) + "*"):
+                try:
+                    os.unlink(path)
+                except Exception:  # noqa
+                    pass
+    else:
+        self._initialize_cache()
+
+
+
+ +
+ + +
+ + + + +

+ load_from_file(filepath) + + + classmethod + + +

+ + +
+ +

Load a datache from a file (including remote file)

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filepath + Union[PathLike, str] + +
+

path to the file to load

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
@classmethod
+def load_from_file(cls, filepath: Union[os.PathLike, str]):
+    """Load a datache from a file (including remote file)
+
+    Args:
+        filepath: path to the file to load
+    """
+    cached_data = None
+    with fsspec.open(filepath, "rb") as f:
+        cached_data = joblib.load(f)
+    data = cached_data.pop("data", {})
+    new_cache = cls(**cached_data)
+    new_cache.update(data)
+    return new_cache
+
+
+
+ +
+ + +
+ + + + +

+ save_to_file(filepath) + +

+ + +
+ +

Save the cache to a file

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filepath + Union[PathLike, str] + +
+

path to the file to save

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
def save_to_file(self, filepath: Union[os.PathLike, str]):
+    """Save the cache to a file
+
+    Args:
+        filepath: path to the file to save
+    """
+    information = dict(
+        name=self.name,
+        n_jobs=self.n_jobs,
+        mol_hasher=self.mol_hasher,
+        verbose=self.verbose,
+        cache_file=(self.cache_file is not None),
+        delete_on_exit=self.delete_on_exit,
+    )
+    information["data"] = self.to_dict()
+    with fsspec.open(filepath, "wb") as f:
+        joblib.dump(information, f)
+
+
+
+ +
+ + +
+ + + + +

+ update(new_cache) + +

+ + +
+ +

Update the cache with new values

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_cache + Mapping[Any, Any] + +
+

new cache with items to use to update current cache

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
365
+366
+367
+368
+369
+370
+371
+372
+373
+374
def update(self, new_cache: Mapping[Any, Any]):
+    """Update the cache with new values
+
+    Args:
+        new_cache: new cache with items to use to update current cache
+    """
+    for k, v in new_cache.items():
+        k = self.mol_hasher(k)
+        self.cache[k] = v
+    return self
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ FileCache + + +

+ + +
+

+ Bases: _Cache

+ + +

Read only cache that holds in precomputed data in a pickle, csv or h5py file.

+

The convention used requires the 'keys' and 'values' columns when +the input file needs to be loaded as a dataframe.

+ +
+ Source code in molfeat/utils/cache.py +
451
+452
+453
+454
+455
+456
+457
+458
+459
+460
+461
+462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
+509
+510
+511
+512
+513
+514
+515
+516
+517
+518
+519
+520
+521
+522
+523
+524
+525
+526
+527
+528
+529
+530
+531
+532
+533
+534
+535
+536
+537
+538
+539
+540
+541
+542
+543
+544
+545
+546
+547
+548
+549
+550
+551
+552
+553
+554
+555
+556
+557
+558
+559
+560
+561
+562
+563
+564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
+577
+578
+579
+580
+581
+582
+583
+584
+585
+586
+587
+588
+589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
+604
+605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
+646
+647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
+669
+670
+671
+672
+673
+674
+675
+676
+677
+678
+679
+680
+681
+682
+683
+684
+685
+686
class FileCache(_Cache):
+    """
+    Read only cache that holds in precomputed data in a pickle, csv or h5py file.
+
+    The convention used requires the 'keys' and  'values' columns when
+    the input file needs to be loaded as a dataframe.
+    """
+
+    _PICKLE_PROTOCOL = 4
+    SUPPORTED_TYPES = ["pickle", "pkl", "csv", "parquet", "pq", "hdf5", "h5"]
+
+    def __init__(
+        self,
+        cache_file: Union[os.PathLike, str],
+        name: Optional[str] = None,
+        mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+        n_jobs: Optional[int] = None,
+        verbose: Union[bool, int] = False,
+        file_type: str = "parquet",
+        clear_on_exit: bool = True,
+        parquet_kwargs: Optional[Dict[Any, Any]] = None,
+    ):
+        """Precomputed fingerprint caching callback
+
+        !!! note
+            Do not pickle this object, instead use the provided saving methods.
+
+        Args:
+            cache_file: Cache location. Can be a local file or a remote file
+            name: optional name to give the cache
+            mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id` is used by default
+            n_jobs: number of parallel jobs to use when performing any computation
+            verbose: whether to print information about the cache
+            clear_on_exit: whether to clear the cache on exit of the interpreter
+            file_type: File type that was provided. One of "csv", "pickle", "hdf5" and "parquet"
+                For "csv" and "parquet", we expect columns "keys" and "values"
+                For a pickle, we expect either a mapping or a dataframe with "keys" and "values" columns
+            parquet_kwargs: Argument to pass to the parquet reader.
+        """
+        super().__init__(name=name, mol_hasher=mol_hasher, n_jobs=n_jobs, verbose=verbose)
+
+        self.cache_file = cache_file
+        self.file_type = file_type
+        self.parquet_kwargs = parquet_kwargs or {}
+        self.clear_on_exit = clear_on_exit
+
+        if self.file_type not in FileCache.SUPPORTED_TYPES:
+            raise ValueError(
+                f"Unsupported file type, expected one of {FileCache.SUPPORTED_TYPES}, got '{self.file_type}'"
+            )
+
+        if self.cache_file is not None and dm.fs.exists(self.cache_file):
+            self._load_cache()
+        else:
+            self.cache = {}
+
+        if self.clear_on_exit:
+            atexit.register(self.clear)
+
+    def clear(self):
+        """Clear cache memory at exit and close any open file
+        Note that a cleared cache cannot be used anymore !
+        """
+        if self.file_type in ["hdf5", "h5"]:
+            self.cache.close()
+        else:
+            del self.cache
+        # reset cache to empty
+        self.cache = {}
+
+    def items(self):
+        """Return iterator of key, values in the cache"""
+        if self.file_type in ["hdf5", "h5"]:
+            return ((k, np.asarray(v)) for k, v in self.cache.items())
+        return super().items()
+
+    def _load_cache(self):
+        """Load cache internally if needed"""
+
+        file_exists = dm.utils.fs.exists(self.cache_file)
+
+        if self.file_type in ["hdf5", "h5"]:
+            f = fsspec.open("simplecache::" + self.cache_file, "rb+").open()
+            self.cache = h5py.File(f, "r+")
+
+        elif not file_exists:
+            self.cache = {}
+
+        elif self.file_type in ["pickle", "pkl"]:
+            with fsspec.open(self.cache_file, "rb") as IN:
+                self.cache = joblib.load(IN)
+
+        elif self.file_type == "csv":
+            with fsspec.open(self.cache_file, "rb") as IN:
+                # Allow the CSV file to exist but with an empty content
+                try:
+                    self.cache = pd.read_csv(
+                        IN,
+                        converters={"values": lambda x: commons.unpack_bits(ast.literal_eval(x))},
+                    )
+                except pandas.errors.EmptyDataError:
+                    self.cache = {}
+
+        elif self.file_type in ["parquet", "pq"]:
+            self.cache = pd.read_parquet(
+                self.cache_file,
+                columns=["keys", "values"],
+                **self.parquet_kwargs,
+            )
+        # convert dataframe to dict if needed
+        if isinstance(self.cache, pd.DataFrame):
+            self.cache = self.cache.set_index("keys").to_dict()["values"]
+
+    def update(self, new_cache: Mapping[Any, Any]):
+        """Update the cache with new values
+
+        Args:
+            new_cache: new cache with items to use to update current cache
+        """
+        for k, v in new_cache.items():
+            key = self.mol_hasher(k)
+            if self.file_type in ["hdf5", "h5"]:
+                self.cache.create_dataset(key, data=v)
+            else:
+                self.cache[key] = v
+        return self
+
+    @classmethod
+    def load_from_file(cls, filepath: Union[os.PathLike, str], **kwargs):
+        """Load a FileCache from a file
+
+        Args:
+            filepath: path to the file to load
+            kwargs: keyword arguments to pass to the constructor
+        """
+        new_cache = cls(cache_file=filepath, **kwargs)
+        return new_cache
+
+    def to_dataframe(self, pack_bits: bool = False):
+        """Convert the cache to a dataframe. The converted dataframe would have `keys` and `values` columns
+
+        Args:
+            pack_bits: whether to pack the values columns into bits.
+                By using molfeat.utils.commons.unpack_bits, the values column can be reloaded as an array
+        """
+        if pack_bits:
+            loaded_items = [
+                (k, commons.pack_bits(x, protocol=self._PICKLE_PROTOCOL)) for k, x in self.items()
+            ]
+        else:
+            loaded_items = self.items()
+        df = pd.DataFrame(loaded_items, columns=["keys", "values"])
+        return df
+
+    def save_to_file(
+        self,
+        filepath: Optional[Union[os.PathLike, str]] = None,
+        file_type: Optional[str] = None,
+        **kwargs,
+    ):
+        """Save the cache to a file
+
+        Args:
+            filepath: path to the file to save. If None, the cache is saved to the original file.
+            file_type: format used to save the cache to file one of "pickle", "csv", "hdf5", "parquet".
+                If None, the original file type is used.
+            kwargs: keyword arguments to pass to the serializer to disk (e.g to pass to pd.to_csv or pd.to_parquet)
+        """
+
+        if filepath is None:
+            filepath = self.cache_file
+
+        if file_type is None:
+            file_type = self.file_type
+
+        if file_type in ["pkl", "pickle"]:
+            with fsspec.open(filepath, "wb") as f:
+                joblib.dump(self.to_dict(), f)
+
+        elif file_type in ["csv", "parquet", "pq"]:
+            df = self.to_dataframe(pack_bits=(file_type == "csv"))
+
+            if file_type == "csv":
+                with fsspec.open(filepath, "w") as f:
+                    df.to_csv(f, index=False, **kwargs)
+            else:
+                df.to_parquet(filepath, index=False, **kwargs)
+
+        elif file_type in ["hdf5", "h5"]:
+            with fsspec.open(filepath, "wb") as IN:
+                with h5py.File(IN, "w") as f:
+                    for k, v in self.items():
+                        f.create_dataset(k, data=v)
+        else:
+            raise ValueError("Unsupported output protocol: {}".format(file_type))
+
+    def to_state_dict(self, save_to_file: bool = True) -> dict:
+        """Serialize the cache to a state dict.
+
+        Args:
+            save_to_file: whether to save the cache to file.
+        """
+
+        if save_to_file is True:
+            self.save_to_file()
+
+        state = {}
+        state["_cache_name"] = "FileCache"
+        state["cache_file"] = self.cache_file
+        state["name"] = self.name
+        state["n_jobs"] = self.n_jobs
+        state["verbose"] = self.verbose
+        state["file_type"] = self.file_type
+        state["clear_on_exit"] = self.clear_on_exit
+        state["parquet_kwargs"] = self.parquet_kwargs
+        state["mol_hasher"] = self.mol_hasher.to_state_dict()
+
+        return state
+
+    @staticmethod
+    def from_state_dict(state: dict, override_args: Optional[dict] = None) -> "FileCache":
+        # Don't alter the original state dict
+        state = copy.deepcopy(state)
+
+        cache_name = state.pop("_cache_name")
+
+        if cache_name != "FileCache":
+            raise ValueError(f"The cache object name is invalid: {cache_name}")
+
+        # Load the MolToKey object
+        state["mol_hasher"] = MolToKey.from_state_dict(state["mol_hasher"])
+
+        if override_args is not None:
+            state.update(override_args)
+
+        return FileCache(**state)
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(cache_file, name=None, mol_hasher=None, n_jobs=None, verbose=False, file_type='parquet', clear_on_exit=True, parquet_kwargs=None) + +

+ + +
+ +

Precomputed fingerprint caching callback

+
+

Note

+

Do not pickle this object, instead use the provided saving methods.

+
+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cache_file + Union[PathLike, str] + +
+

Cache location. Can be a local file or a remote file

+
+
+ required +
name + Optional[str] + +
+

optional name to give the cache

+
+
+ None +
mol_hasher + Optional[Union[Callable, str, MolToKey]] + +
+

function to use to hash molecules. If not provided, dm.unique_id is used by default

+
+
+ None +
n_jobs + Optional[int] + +
+

number of parallel jobs to use when performing any computation

+
+
+ None +
verbose + Union[bool, int] + +
+

whether to print information about the cache

+
+
+ False +
clear_on_exit + bool + +
+

whether to clear the cache on exit of the interpreter

+
+
+ True +
file_type + str + +
+

File type that was provided. One of "csv", "pickle", "hdf5" and "parquet" +For "csv" and "parquet", we expect columns "keys" and "values" +For a pickle, we expect either a mapping or a dataframe with "keys" and "values" columns

+
+
+ 'parquet' +
parquet_kwargs + Optional[Dict[Any, Any]] + +
+

Argument to pass to the parquet reader.

+
+
+ None +
+ +
+ Source code in molfeat/utils/cache.py +
462
+463
+464
+465
+466
+467
+468
+469
+470
+471
+472
+473
+474
+475
+476
+477
+478
+479
+480
+481
+482
+483
+484
+485
+486
+487
+488
+489
+490
+491
+492
+493
+494
+495
+496
+497
+498
+499
+500
+501
+502
+503
+504
+505
+506
+507
+508
def __init__(
+    self,
+    cache_file: Union[os.PathLike, str],
+    name: Optional[str] = None,
+    mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+    n_jobs: Optional[int] = None,
+    verbose: Union[bool, int] = False,
+    file_type: str = "parquet",
+    clear_on_exit: bool = True,
+    parquet_kwargs: Optional[Dict[Any, Any]] = None,
+):
+    """Precomputed fingerprint caching callback
+
+    !!! note
+        Do not pickle this object, instead use the provided saving methods.
+
+    Args:
+        cache_file: Cache location. Can be a local file or a remote file
+        name: optional name to give the cache
+        mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id` is used by default
+        n_jobs: number of parallel jobs to use when performing any computation
+        verbose: whether to print information about the cache
+        clear_on_exit: whether to clear the cache on exit of the interpreter
+        file_type: File type that was provided. One of "csv", "pickle", "hdf5" and "parquet"
+            For "csv" and "parquet", we expect columns "keys" and "values"
+            For a pickle, we expect either a mapping or a dataframe with "keys" and "values" columns
+        parquet_kwargs: Argument to pass to the parquet reader.
+    """
+    super().__init__(name=name, mol_hasher=mol_hasher, n_jobs=n_jobs, verbose=verbose)
+
+    self.cache_file = cache_file
+    self.file_type = file_type
+    self.parquet_kwargs = parquet_kwargs or {}
+    self.clear_on_exit = clear_on_exit
+
+    if self.file_type not in FileCache.SUPPORTED_TYPES:
+        raise ValueError(
+            f"Unsupported file type, expected one of {FileCache.SUPPORTED_TYPES}, got '{self.file_type}'"
+        )
+
+    if self.cache_file is not None and dm.fs.exists(self.cache_file):
+        self._load_cache()
+    else:
+        self.cache = {}
+
+    if self.clear_on_exit:
+        atexit.register(self.clear)
+
+
+
+ +
+ + +
+ + + + +

+ clear() + +

+ + +
+ +

Clear cache memory at exit and close any open file +Note that a cleared cache cannot be used anymore !

+ +
+ Source code in molfeat/utils/cache.py +
510
+511
+512
+513
+514
+515
+516
+517
+518
+519
def clear(self):
+    """Clear cache memory at exit and close any open file
+    Note that a cleared cache cannot be used anymore !
+    """
+    if self.file_type in ["hdf5", "h5"]:
+        self.cache.close()
+    else:
+        del self.cache
+    # reset cache to empty
+    self.cache = {}
+
+
+
+ +
+ + +
+ + + + +

+ items() + +

+ + +
+ +

Return iterator of key, values in the cache

+ +
+ Source code in molfeat/utils/cache.py +
521
+522
+523
+524
+525
def items(self):
+    """Return iterator of key, values in the cache"""
+    if self.file_type in ["hdf5", "h5"]:
+        return ((k, np.asarray(v)) for k, v in self.cache.items())
+    return super().items()
+
+
+
+ +
+ + +
+ + + + +

+ load_from_file(filepath, **kwargs) + + + classmethod + + +

+ + +
+ +

Load a FileCache from a file

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filepath + Union[PathLike, str] + +
+

path to the file to load

+
+
+ required +
kwargs + +
+

keyword arguments to pass to the constructor

+
+
+ {} +
+ +
+ Source code in molfeat/utils/cache.py +
578
+579
+580
+581
+582
+583
+584
+585
+586
+587
@classmethod
+def load_from_file(cls, filepath: Union[os.PathLike, str], **kwargs):
+    """Load a FileCache from a file
+
+    Args:
+        filepath: path to the file to load
+        kwargs: keyword arguments to pass to the constructor
+    """
+    new_cache = cls(cache_file=filepath, **kwargs)
+    return new_cache
+
+
+
+ +
+ + +
+ + + + +

+ save_to_file(filepath=None, file_type=None, **kwargs) + +

+ + +
+ +

Save the cache to a file

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filepath + Optional[Union[PathLike, str]] + +
+

path to the file to save. If None, the cache is saved to the original file.

+
+
+ None +
file_type + Optional[str] + +
+

format used to save the cache to file one of "pickle", "csv", "hdf5", "parquet". +If None, the original file type is used.

+
+
+ None +
kwargs + +
+

keyword arguments to pass to the serializer to disk (e.g to pass to pd.to_csv or pd.to_parquet)

+
+
+ {} +
+ +
+ Source code in molfeat/utils/cache.py +
605
+606
+607
+608
+609
+610
+611
+612
+613
+614
+615
+616
+617
+618
+619
+620
+621
+622
+623
+624
+625
+626
+627
+628
+629
+630
+631
+632
+633
+634
+635
+636
+637
+638
+639
+640
+641
+642
+643
+644
+645
def save_to_file(
+    self,
+    filepath: Optional[Union[os.PathLike, str]] = None,
+    file_type: Optional[str] = None,
+    **kwargs,
+):
+    """Save the cache to a file
+
+    Args:
+        filepath: path to the file to save. If None, the cache is saved to the original file.
+        file_type: format used to save the cache to file one of "pickle", "csv", "hdf5", "parquet".
+            If None, the original file type is used.
+        kwargs: keyword arguments to pass to the serializer to disk (e.g to pass to pd.to_csv or pd.to_parquet)
+    """
+
+    if filepath is None:
+        filepath = self.cache_file
+
+    if file_type is None:
+        file_type = self.file_type
+
+    if file_type in ["pkl", "pickle"]:
+        with fsspec.open(filepath, "wb") as f:
+            joblib.dump(self.to_dict(), f)
+
+    elif file_type in ["csv", "parquet", "pq"]:
+        df = self.to_dataframe(pack_bits=(file_type == "csv"))
+
+        if file_type == "csv":
+            with fsspec.open(filepath, "w") as f:
+                df.to_csv(f, index=False, **kwargs)
+        else:
+            df.to_parquet(filepath, index=False, **kwargs)
+
+    elif file_type in ["hdf5", "h5"]:
+        with fsspec.open(filepath, "wb") as IN:
+            with h5py.File(IN, "w") as f:
+                for k, v in self.items():
+                    f.create_dataset(k, data=v)
+    else:
+        raise ValueError("Unsupported output protocol: {}".format(file_type))
+
+
+
+ +
+ + +
+ + + + +

+ to_dataframe(pack_bits=False) + +

+ + +
+ +

Convert the cache to a dataframe. The converted dataframe would have keys and values columns

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
pack_bits + bool + +
+

whether to pack the values columns into bits. +By using molfeat.utils.commons.unpack_bits, the values column can be reloaded as an array

+
+
+ False +
+ +
+ Source code in molfeat/utils/cache.py +
589
+590
+591
+592
+593
+594
+595
+596
+597
+598
+599
+600
+601
+602
+603
def to_dataframe(self, pack_bits: bool = False):
+    """Convert the cache to a dataframe. The converted dataframe would have `keys` and `values` columns
+
+    Args:
+        pack_bits: whether to pack the values columns into bits.
+            By using molfeat.utils.commons.unpack_bits, the values column can be reloaded as an array
+    """
+    if pack_bits:
+        loaded_items = [
+            (k, commons.pack_bits(x, protocol=self._PICKLE_PROTOCOL)) for k, x in self.items()
+        ]
+    else:
+        loaded_items = self.items()
+    df = pd.DataFrame(loaded_items, columns=["keys", "values"])
+    return df
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict(save_to_file=True) + +

+ + +
+ +

Serialize the cache to a state dict.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
save_to_file + bool + +
+

whether to save the cache to file.

+
+
+ True +
+ +
+ Source code in molfeat/utils/cache.py +
647
+648
+649
+650
+651
+652
+653
+654
+655
+656
+657
+658
+659
+660
+661
+662
+663
+664
+665
+666
+667
+668
def to_state_dict(self, save_to_file: bool = True) -> dict:
+    """Serialize the cache to a state dict.
+
+    Args:
+        save_to_file: whether to save the cache to file.
+    """
+
+    if save_to_file is True:
+        self.save_to_file()
+
+    state = {}
+    state["_cache_name"] = "FileCache"
+    state["cache_file"] = self.cache_file
+    state["name"] = self.name
+    state["n_jobs"] = self.n_jobs
+    state["verbose"] = self.verbose
+    state["file_type"] = self.file_type
+    state["clear_on_exit"] = self.clear_on_exit
+    state["parquet_kwargs"] = self.parquet_kwargs
+    state["mol_hasher"] = self.mol_hasher.to_state_dict()
+
+    return state
+
+
+
+ +
+ + +
+ + + + +

+ update(new_cache) + +

+ + +
+ +

Update the cache with new values

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
new_cache + Mapping[Any, Any] + +
+

new cache with items to use to update current cache

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
564
+565
+566
+567
+568
+569
+570
+571
+572
+573
+574
+575
+576
def update(self, new_cache: Mapping[Any, Any]):
+    """Update the cache with new values
+
+    Args:
+        new_cache: new cache with items to use to update current cache
+    """
+    for k, v in new_cache.items():
+        key = self.mol_hasher(k)
+        if self.file_type in ["hdf5", "h5"]:
+            self.cache.create_dataset(key, data=v)
+        else:
+            self.cache[key] = v
+    return self
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ MPDataCache + + +

+ + +
+

+ Bases: DataCache

+ + +

A datacache that supports multiprocessing natively

+ +
+ Source code in molfeat/utils/cache.py +
415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
+444
+445
+446
+447
+448
class MPDataCache(DataCache):
+    """A datacache that supports multiprocessing natively"""
+
+    def __init__(
+        self,
+        name: Optional[str] = None,
+        n_jobs: int = -1,
+        mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+        verbose: Union[bool, int] = False,
+        clear_on_exit: bool = False,
+    ):
+        """Multiprocessing datacache that save cache into a shared memory
+
+        Args:
+            name: name of the cache
+            n_jobs: number of parallel jobs to use when performing any computation
+            mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id`` is used by default
+            verbose: whether to print progress. Default to False
+            clear_on_exit: Whether to clear the cache on exit. Default is False to allow sharing the cache content
+        """
+        super().__init__(
+            name=name,
+            n_jobs=n_jobs,
+            mol_hasher=mol_hasher,
+            cache_file=None,
+            verbose=verbose,
+            delete_on_exit=False,
+            clear_on_exit=clear_on_exit,
+        )
+
+    def _initialize_cache(self):
+        """Initialize empty cache using a shared dict"""
+        manager = mp.Manager()  # this might not be a great idea to initialize everytime...
+        self.cache = manager.dict()
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(name=None, n_jobs=-1, mol_hasher=None, verbose=False, clear_on_exit=False) + +

+ + +
+ +

Multiprocessing datacache that save cache into a shared memory

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + Optional[str] + +
+

name of the cache

+
+
+ None +
n_jobs + int + +
+

number of parallel jobs to use when performing any computation

+
+
+ -1 +
mol_hasher + Optional[Union[Callable, str, MolToKey]] + +
+

function to use to hash molecules. If not provided, `dm.unique_id`` is used by default

+
+
+ None +
verbose + Union[bool, int] + +
+

whether to print progress. Default to False

+
+
+ False +
clear_on_exit + bool + +
+

Whether to clear the cache on exit. Default is False to allow sharing the cache content

+
+
+ False +
+ +
+ Source code in molfeat/utils/cache.py +
418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
+436
+437
+438
+439
+440
+441
+442
+443
def __init__(
+    self,
+    name: Optional[str] = None,
+    n_jobs: int = -1,
+    mol_hasher: Optional[Union[Callable, str, MolToKey]] = None,
+    verbose: Union[bool, int] = False,
+    clear_on_exit: bool = False,
+):
+    """Multiprocessing datacache that save cache into a shared memory
+
+    Args:
+        name: name of the cache
+        n_jobs: number of parallel jobs to use when performing any computation
+        mol_hasher: function to use to hash molecules. If not provided, `dm.unique_id`` is used by default
+        verbose: whether to print progress. Default to False
+        clear_on_exit: Whether to clear the cache on exit. Default is False to allow sharing the cache content
+    """
+    super().__init__(
+        name=name,
+        n_jobs=n_jobs,
+        mol_hasher=mol_hasher,
+        cache_file=None,
+        verbose=verbose,
+        delete_on_exit=False,
+        clear_on_exit=clear_on_exit,
+    )
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ MolToKey + + +

+ + +
+ + +

Convert a molecule to a key

+ +
+ Source code in molfeat/utils/cache.py +
35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
+92
+93
+94
+95
+96
class MolToKey:
+    """Convert a molecule to a key"""
+
+    SUPPORTED_HASH_FN = {
+        "dm.unique_id": dm.unique_id,
+        "dm.to_inchikey": dm.to_inchikey,
+    }
+
+    def __init__(self, hash_fn: Optional[Union[Callable, str]] = "dm.unique_id"):
+        """Init function for molecular key generator.
+
+        Args:
+            hash_fn: hash function to use for the molecular key
+        """
+
+        if isinstance(hash_fn, str):
+            if hash_fn not in self.SUPPORTED_HASH_FN:
+                raise ValueError(
+                    f"Hash function {hash_fn} is not supported. "
+                    f"Supported hash functions are: {self.SUPPORTED_HASH_FN.keys()}"
+                )
+
+            self.hash_name = hash_fn
+            self.hash_fn = self.SUPPORTED_HASH_FN[hash_fn]
+
+        else:
+            self.hash_fn = hash_fn
+            self.hash_name = None
+
+            if self.hash_fn is None:
+                self.hash_fn = dm.unique_id
+                self.hash_name = "dm.unique_id"
+
+    def __call__(self, mol: dm.Mol):
+        """Convert a molecule object to a key that can be used for the cache system
+
+        Args:
+            mol: input molecule object
+        """
+        with dm.without_rdkit_log():
+            is_mol = dm.to_mol(mol) is not None
+            if is_mol and self.hash_fn is not None:
+                return self.hash_fn(mol)
+        return mol
+
+    def to_state_dict(self):
+        """Serialize MolToKey to a state dict."""
+
+        if self.hash_name is None:
+            raise ValueError(
+                "The hash function has been provided as a function and not a string. "
+                "So it's impossible to save the state. You must specifiy the hash function as a string instead."
+            )
+
+        state = {}
+        state["hash_name"] = self.hash_name
+        return state
+
+    @staticmethod
+    def from_state_dict(state: dict) -> "MolToKey":
+        """Load a MolToKey object from a state dict."""
+        return MolToKey(hash_fn=state["hash_name"])
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __call__(mol) + +

+ + +
+ +

Convert a molecule object to a key that can be used for the cache system

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mol + Mol + +
+

input molecule object

+
+
+ required +
+ +
+ Source code in molfeat/utils/cache.py +
68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
def __call__(self, mol: dm.Mol):
+    """Convert a molecule object to a key that can be used for the cache system
+
+    Args:
+        mol: input molecule object
+    """
+    with dm.without_rdkit_log():
+        is_mol = dm.to_mol(mol) is not None
+        if is_mol and self.hash_fn is not None:
+            return self.hash_fn(mol)
+    return mol
+
+
+
+ +
+ + +
+ + + + +

+ __init__(hash_fn='dm.unique_id') + +

+ + +
+ +

Init function for molecular key generator.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
hash_fn + Optional[Union[Callable, str]] + +
+

hash function to use for the molecular key

+
+
+ 'dm.unique_id' +
+ +
+ Source code in molfeat/utils/cache.py +
43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
def __init__(self, hash_fn: Optional[Union[Callable, str]] = "dm.unique_id"):
+    """Init function for molecular key generator.
+
+    Args:
+        hash_fn: hash function to use for the molecular key
+    """
+
+    if isinstance(hash_fn, str):
+        if hash_fn not in self.SUPPORTED_HASH_FN:
+            raise ValueError(
+                f"Hash function {hash_fn} is not supported. "
+                f"Supported hash functions are: {self.SUPPORTED_HASH_FN.keys()}"
+            )
+
+        self.hash_name = hash_fn
+        self.hash_fn = self.SUPPORTED_HASH_FN[hash_fn]
+
+    else:
+        self.hash_fn = hash_fn
+        self.hash_name = None
+
+        if self.hash_fn is None:
+            self.hash_fn = dm.unique_id
+            self.hash_name = "dm.unique_id"
+
+
+
+ +
+ + +
+ + + + +

+ from_state_dict(state) + + + staticmethod + + +

+ + +
+ +

Load a MolToKey object from a state dict.

+ +
+ Source code in molfeat/utils/cache.py +
93
+94
+95
+96
@staticmethod
+def from_state_dict(state: dict) -> "MolToKey":
+    """Load a MolToKey object from a state dict."""
+    return MolToKey(hash_fn=state["hash_name"])
+
+
+
+ +
+ + +
+ + + + +

+ to_state_dict() + +

+ + +
+ +

Serialize MolToKey to a state dict.

+ +
+ Source code in molfeat/utils/cache.py +
80
+81
+82
+83
+84
+85
+86
+87
+88
+89
+90
+91
def to_state_dict(self):
+    """Serialize MolToKey to a state dict."""
+
+    if self.hash_name is None:
+        raise ValueError(
+            "The hash function has been provided as a function and not a string. "
+            "So it's impossible to save the state. You must specifiy the hash function as a string instead."
+        )
+
+    state = {}
+    state["hash_name"] = self.hash_name
+    return state
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +

+

Common utils

+ + +
+ + + + +
+ +

Common utility functions

+ + + +
+ + + + + + + + + + +
+ + + + +

+ align_conformers(mols, ref_id=0, copy=True, conformer_id=-1) + +

+ + +
+ +

Align a list of molecules to a reference molecule.

+

Note: consider adding me to datamol.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + List[Mol] + +
+

List of molecules to align. All the molecules must have a conformer.

+
+
+ required +
ref_id + int + +
+

Index of the reference molecule. By default, the first molecule in the list +will be used as reference.

+
+
+ 0 +
copy + bool + +
+

Whether to copy the molecules before performing the alignement.

+
+
+ True +
conformer_id + int + +
+

Conformer id to use.

+
+
+ -1 +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
Name TypeDescription
mols + +
+

The aligned molecules.

+
+
scores + +
+

The score of the alignement.

+
+
+ +
+ Source code in molfeat/utils/commons.py +
378
+379
+380
+381
+382
+383
+384
+385
+386
+387
+388
+389
+390
+391
+392
+393
+394
+395
+396
+397
+398
+399
+400
+401
+402
+403
+404
+405
+406
+407
+408
+409
+410
+411
+412
+413
+414
+415
+416
+417
+418
+419
+420
+421
+422
+423
+424
+425
+426
+427
+428
+429
+430
+431
+432
+433
+434
+435
def align_conformers(
+    mols: List[dm.Mol],
+    ref_id: int = 0,
+    copy: bool = True,
+    conformer_id: int = -1,
+):
+    """Align a list of molecules to a reference molecule.
+
+    Note: consider adding me to `datamol`.
+
+    Args:
+        mols: List of molecules to align. All the molecules must have a conformer.
+        ref_id: Index of the reference molecule. By default, the first molecule in the list
+            will be used as reference.
+        copy: Whether to copy the molecules before performing the alignement.
+        conformer_id: Conformer id to use.
+
+    Returns:
+        mols: The aligned molecules.
+        scores: The score of the alignement.
+    """
+
+    # Check all input molecules has a conformer
+    if not all([mol.GetNumConformers() >= 1 for mol in mols]):
+        raise ValueError("One or more input molecules is missing a conformer.")
+
+    # Make a copy of the molecules since they are going to be modified
+    if copy:
+        mols = [dm.copy_mol(mol) for mol in mols]
+
+    # Compute Crippen contributions for every atoms and molecules
+    crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in mols]
+
+    # Split reference and probe molecules
+    crippen_contrib_ref = crippen_contribs[ref_id]
+    crippen_contrib_probes = crippen_contribs
+    mol_ref = mols[ref_id]
+    mol_probes = mols
+
+    # Loop and align
+    scores = []
+    for i, mol in enumerate(mol_probes):
+        crippenO3A = rdMolAlign.GetCrippenO3A(
+            prbMol=mol,
+            refMol=mol_ref,
+            prbCrippenContribs=crippen_contrib_probes[i],
+            refCrippenContribs=crippen_contrib_ref,
+            prbCid=conformer_id,
+            refCid=conformer_id,
+            maxIters=50,
+        )
+        crippenO3A.Align()
+
+        scores.append(crippenO3A.Score())
+
+    scores = np.array(scores)
+
+    return mols, scores
+
+
+
+ +
+ + +
+ + + + +

+ concat_dict(prop_dict, new_name, order=None) + +

+ + +
+ +

Concat properties in dict into a single key dict

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
prop_dict + dict + +
+

Input dict of property names and their computed values

+
+
+ required +
new_name + str + +
+

new name under which the concatenated property dict will be returned

+
+
+ required +
order + Optional[Iterable[str]] + +
+

Optional list of key that specifies the order in which concatenation should be done. Sorting list by default

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
dict + +
+

dictionary of concatenated output values with a single key corresponding to new_name

+
+
+ +
+ Source code in molfeat/utils/commons.py +
304
+305
+306
+307
+308
+309
+310
+311
+312
+313
+314
+315
+316
+317
+318
+319
+320
+321
def concat_dict(prop_dict: dict, new_name: str, order: Optional[Iterable[str]] = None):
+    """Concat properties in dict into a single key dict
+
+    Args:
+        prop_dict (dict): Input dict of property names and their computed values
+        new_name (str): new name under which the concatenated property dict will be returned
+        order: Optional list of key that specifies the order in which concatenation should be done. Sorting list by default
+
+    Returns:
+        dict: dictionary of concatenated output values with a single key corresponding to new_name
+    """
+    if not order:
+        order = list(sorted(prop_dict.keys()))
+
+    if len(order) > 0:
+        concatenated_val = np.concatenate([prop_dict[x] for x in order], axis=1)
+        output_dict = {new_name: concatenated_val}
+    return output_dict
+
+
+
+ +
+ + +
+ + + + +

+ ensure_picklable(fn) + +

+ + +
+ +

Ensure a function is picklable

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fn + Callable + +
+

function to be pickled

+
+
+ required +
+ +
+ Source code in molfeat/utils/commons.py +
86
+87
+88
+89
+90
+91
+92
+93
+94
def ensure_picklable(fn: Callable):
+    """Ensure a function is picklable
+
+    Args:
+        fn: function to be pickled
+    """
+    if inspect.isfunction(fn) and fn.__name__ == "<lambda>":
+        return wrap_non_picklable_objects(fn)
+    return fn
+
+
+
+ +
+ + +
+ + + + +

+ filter_arguments(fn, params) + +

+ + +
+ +

Filter the argument of a function to only retain the valid ones

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fn + Callable + +
+

Function for which arguments will be checked

+
+
+ required +
params + dict + +
+

key-val dictionary of arguments to pass to the input function

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
params_filtered + dict + +
+

dict of filtered arguments for the function

+
+
+ +
+ Source code in molfeat/utils/commons.py +
179
+180
+181
+182
+183
+184
+185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
def filter_arguments(fn: Callable, params: dict):
+    """Filter the argument of a function to only retain the valid ones
+
+    Args:
+        fn: Function for which arguments will be checked
+        params: key-val dictionary of arguments to pass to the input function
+
+    Returns:
+        params_filtered (dict): dict of filtered arguments for the function
+    """
+    accepted_dict = inspect.signature(fn).parameters
+    accepted_list = []
+    for key in accepted_dict.keys():
+        param = str(accepted_dict[key])
+        if param[0] != "*":
+            accepted_list.append(param)
+    params_filtered = {key: params[key] for key in list(set(accepted_list) & set(params.keys()))}
+    return params_filtered
+
+
+
+ +
+ + +
+ + + + +

+ fn_to_hex(fn) + +

+ + +
+ +

Pickle an object and return its hex representation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fn + +
+

object to pickle

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
str + +
+

hex representation of object

+
+
+ +
+ Source code in molfeat/utils/commons.py +
 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
def fn_to_hex(fn):
+    """Pickle an object and return its hex representation
+
+    Args:
+        fn: object to pickle
+
+    Returns:
+        str: hex representation of object
+    """
+    bytes_str = pickle.dumps(ensure_picklable(fn))
+    return bytes_str.hex()
+
+
+
+ +
+ + +
+ + + + +

+ fold_count_fp(fp, dim=2 ** 10, binary=False) + +

+ + +
+ +

Fast folding of a count fingerprint to the specified dimension

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fp + Iterable + +
+

iterable fingerprint

+
+
+ required +
dim + int + +
+

dimension of the folded array if not provided. Defaults to 2**10.

+
+
+ 2 ** 10 +
binary + bool + +
+

whether to fold into a binary array or take use a count vector

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
folded + +
+

returns folded array to the provided dimension

+
+
+ +
+ Source code in molfeat/utils/commons.py +
199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
+224
+225
+226
+227
+228
+229
+230
+231
def fold_count_fp(fp: Iterable, dim: int = 2**10, binary: bool = False):
+    """Fast folding of a count fingerprint to the specified dimension
+
+    Args:
+        fp: iterable fingerprint
+        dim: dimension of the folded array if not provided. Defaults to 2**10.
+        binary: whether to fold into a binary array or take use a count vector
+
+    Returns:
+        folded: returns folded array to the provided dimension
+    """
+    if hasattr(fp, "GetNonzeroElements"):
+        tmp = fp.GetNonzeroElements()
+    elif hasattr(fp, "GetOnBits"):
+        # try to get the dict of onbit
+        on_bits = fp.GetOnBits()
+        tmp = dict(zip(on_bits, np.ones(len(on_bits))))
+    else:
+        raise ValueError(f"Format {type(fp)} is not supported")
+    out = (
+        coo_matrix(
+            (
+                list(tmp.values()),
+                (np.repeat(0, len(tmp)), [i % dim for i in tmp.keys()]),
+            ),
+            shape=(1, dim),
+        )
+        .toarray()
+        .flatten()
+    )
+    if binary:
+        out = np.clip(out, a_min=0, a_max=1)
+    return out
+
+
+
+ +
+ + +
+ + + + +

+ get_class_name(cls) + +

+ + +
+ +

Get class full name

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
cls + Type + +
+

name of the class

+
+
+ required +
+ +
+ Source code in molfeat/utils/commons.py +
57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
def get_class_name(cls: Type):
+    """Get class full name
+
+    Args:
+        cls: name of the class
+    """
+    module = cls.__module__
+    name = cls.__qualname__
+    if module is not None and module != "__builtin__":
+        name = module + "." + name
+    return name
+
+
+
+ +
+ + +
+ + + + +

+ hex_to_fn(hex) + +

+ + +
+ +

Load a hex string as a callable. Raise error on fail

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
hex + str + +
+

hex string to load as a callable

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
callable + +
+

callable loaded from the hex string

+
+
+ +
+ Source code in molfeat/utils/commons.py +
110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
def hex_to_fn(hex: str):
+    """Load a hex string as a callable. Raise error on fail
+
+    Args:
+        hex: hex string to load as a callable
+
+    Returns:
+        callable: callable loaded from the hex string
+    """
+    # EN: pickling with pickle is probably faster
+    fn = pickle.loads(bytes.fromhex(hex))
+    return fn
+
+
+
+ +
+ + +
+ + + + +

+ is_callable(func) + +

+ + +
+ +

Check if func is a function or a callable

+ +
+ Source code in molfeat/utils/commons.py +
32
+33
+34
+35
+36
def is_callable(func):
+    r"""
+    Check if func is a function or a callable
+    """
+    return func and (isinstance(func, FUNCTYPES) or callable(func))
+
+
+
+ +
+ + +
+ + + + +

+ one_hot_encoding(val, allowable_set, encode_unknown=False, dtype=int) + +

+ + +
+ +

Converts a single value to a one-hot vector.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
val + int + +
+

class to be converted into a one hot vector

+
+
+ required +
allowable_set + Iterable + +
+

a list or 1D array of allowed choices for val to take

+
+
+ required +
dtype + Callable + +
+

data type of the the return. Default = int.

+
+
+ int +
encode_unknown + bool + +
+

whether to map inputs not in allowable set to an additional last element.

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A numpy 1D array of length len(allowable_set) + 1

+
+
+ +
+ Source code in molfeat/utils/commons.py +
124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
def one_hot_encoding(
+    val: int,
+    allowable_set: Iterable,
+    encode_unknown: bool = False,
+    dtype: Callable = int,
+):
+    r"""Converts a single value to a one-hot vector.
+
+    Args:
+        val: class to be converted into a one hot vector
+        allowable_set: a list or 1D array of allowed choices for val to take
+        dtype: data type of the the return. Default = int.
+        encode_unknown: whether to map inputs not in allowable set to an additional last element.
+
+    Returns:
+        A numpy 1D array of length len(allowable_set) + 1
+    """
+
+    encoding = np.zeros(len(allowable_set) + int(encode_unknown), dtype=dtype)
+    # not using index of, in case, someone fuck up
+    # and there are duplicates in the allowed choices
+    for i, v in enumerate(allowable_set):
+        if v == val:
+            encoding[i] = 1
+    if np.sum(encoding) == 0 and encode_unknown:  # aka not found
+        encoding[-1] = 1
+    return encoding
+
+
+
+ +
+ + +
+ + + + +

+ pack_bits(obj, protocol=4) + +

+ + +
+ +

Pack an object into a bits representation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
obj + +
+

object to pack

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
bytes + +
+

byte-packed version of object

+
+
+ +
+ Source code in molfeat/utils/commons.py +
354
+355
+356
+357
+358
+359
+360
+361
+362
+363
def pack_bits(obj, protocol=4):
+    """Pack an object into a bits representation
+
+    Args:
+        obj: object to pack
+
+    Returns:
+        bytes: byte-packed version of object
+    """
+    return pickle.dumps(obj, protocol=protocol)
+
+
+
+ +
+ + +
+ + + + +

+ pack_graph(batch_G, batch_x) + +

+ + +
+ +

Pack a batch of graph and atom features into a single graph

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
batch_G + List[FloatTensor] + +
+

List of adjacency graph, each of size (n_i, n_i).

+
+
+ required +
batch_x + List[FloatTensor] + +
+

List of atom feature matrices, each of size (n_i, F), F being the number of features

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

new_batch_G, new_batch_x: torch.LongTensor 2D, torch.Tensor 2D +This tuple represents a new arbitrary graph that contains the whole batch, +and the corresponding atom feature matrix. new_batch_G has a size (N, N), with :math:N = \sum_i n_i, +while new_batch_x has size (N,D)

+
+
+ +
+ Source code in molfeat/utils/commons.py +
324
+325
+326
+327
+328
+329
+330
+331
+332
+333
+334
+335
+336
+337
+338
+339
+340
+341
+342
+343
+344
+345
+346
+347
+348
+349
+350
+351
def pack_graph(
+    batch_G: List[torch.FloatTensor],
+    batch_x: List[torch.FloatTensor],
+):
+    r"""
+    Pack a batch of graph and atom features into a single graph
+
+    Args:
+        batch_G: List of adjacency graph, each of size (n_i, n_i).
+        batch_x: List of atom feature matrices, each of size (n_i, F), F being the number of features
+
+    Returns:
+        new_batch_G, new_batch_x: torch.LongTensor 2D, torch.Tensor 2D
+            This tuple represents a new arbitrary graph that contains the whole batch,
+            and the corresponding atom feature matrix. new_batch_G has a size (N, N), with :math:`N = \sum_i n_i`,
+            while new_batch_x has size (N,D)
+    """
+
+    new_batch_x = torch.cat(tuple(batch_x), dim=0)
+    n_neigb = new_batch_x.shape[0]
+    # should be on the same device
+    new_batch_G = batch_G[0].new_zeros((n_neigb, n_neigb))
+    cur_ind = 0
+    for g in batch_G:
+        g_size = g.shape[0] + cur_ind
+        new_batch_G[cur_ind:g_size, cur_ind:g_size] = g
+        cur_ind = g_size
+    return new_batch_G, new_batch_x
+
+
+
+ +
+ + +
+ + + + +

+ requires_conformer(calculator) + +

+ + +
+ +

Decorator for any descriptor calculator that requires conformers

+ +
+ Source code in molfeat/utils/commons.py +
234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
def requires_conformer(calculator: Callable):
+    """Decorator for any descriptor calculator that requires conformers"""
+
+    # this is a method or __call__
+    if inspect.getfullargspec(calculator).args[0] == "self":
+
+        @functools.wraps(calculator)
+        def calculator_wrapper(ref, mol, *args, **kwargs):
+            mol = dm.to_mol(mol)
+            if mol.GetNumConformers() < 1:
+                raise ValueError("Expected a molecule with conformers information.")
+            return calculator(ref, mol, *args, **kwargs)
+
+    else:
+
+        @functools.wraps(calculator)
+        def calculator_wrapper(mol, *args, **kwargs):
+            mol = dm.to_mol(mol)
+            if mol.GetNumConformers() < 1:
+                raise ValueError("Expected a molecule with conformers information.")
+            return calculator(mol, *args, **kwargs)
+
+    return calculator_wrapper
+
+
+
+ +
+ + +
+ + + + +

+ requires_standardization(calculator=None, *, disconnect_metals=True, remove_salt=True, **standardize_kwargs) + +

+ + +
+ +

Decorator for any descriptor calculator that required standardization of the molecules +Args: + calculator: calculator to wrap + disconnect_metals: whether to force metal disconnection + remove_salt: whether to remove salt from the molecule

+ +
+ Source code in molfeat/utils/commons.py +
259
+260
+261
+262
+263
+264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
+287
+288
+289
+290
+291
+292
+293
+294
+295
+296
+297
+298
+299
+300
+301
def requires_standardization(
+    calculator: Callable = None,
+    *,
+    disconnect_metals: bool = True,
+    remove_salt: bool = True,
+    **standardize_kwargs,
+):
+    """Decorator for any descriptor calculator that required standardization of the molecules
+    Args:
+        calculator: calculator to wrap
+        disconnect_metals: whether to force metal disconnection
+        remove_salt: whether to remove salt from the molecule
+    """
+
+    def _standardize_mol(calculator):
+        @functools.wraps(calculator)
+        def wrapped_function(mol, *args, **kwargs):
+            mol = _clean_mol_for_descriptors(
+                mol,
+                disconnect_metals=disconnect_metals,
+                remove_salt=remove_salt,
+                **standardize_kwargs,
+            )
+            return calculator(mol, *args, **kwargs)
+
+        @functools.wraps(calculator)
+        def class_wrapped_function(ref, mol, *args, **kwargs):
+            if not getattr(ref, "do_not_standardize", False):
+                mol = _clean_mol_for_descriptors(
+                    mol,
+                    disconnect_metals=disconnect_metals,
+                    remove_salt=remove_salt,
+                    **standardize_kwargs,
+                )
+            return calculator(ref, mol, *args, **kwargs)
+
+        if inspect.getfullargspec(calculator).args[0] == "self":
+            return class_wrapped_function
+        return wrapped_function
+
+    if calculator is not None:
+        return _standardize_mol(calculator)
+    return _standardize_mol
+
+
+
+ +
+ + +
+ + + + +

+ sha256sum(filepath) + +

+ + +
+ +

Return the sha256 sum hash of a file or a directory

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
filepath + Union[str, PathLike] + +
+

The path to the file to compute the MD5 hash on.

+
+
+ required +
+ +
+ Source code in molfeat/utils/commons.py +
39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
def sha256sum(filepath: Union[str, os.PathLike]):
+    """Return the sha256 sum hash of a file or a directory
+
+    Args:
+        filepath: The path to the file to compute the MD5 hash on.
+    """
+    if dm.fs.is_dir(filepath):
+        files = list(dm.fs.glob(os.path.join(filepath, "**", "*")))
+    else:
+        files = [filepath]
+    file_hash = hashlib.sha256()
+    for filepath in sorted(files):
+        with fsspec.open(filepath) as f:
+            file_hash.update(f.read())  # type: ignore
+    file_hash = file_hash.hexdigest()
+    return file_hash
+
+
+
+ +
+ + +
+ + + + +

+ unpack_bits(bvalues) + +

+ + +
+ +

Pack an object into a bits representation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
bvalues + +
+

bytes to be unpacked

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
obj + +
+

object that was packed

+
+
+ +
+ Source code in molfeat/utils/commons.py +
366
+367
+368
+369
+370
+371
+372
+373
+374
+375
def unpack_bits(bvalues):
+    """Pack an object into a bits representation
+
+    Args:
+        bvalues: bytes to be unpacked
+
+    Returns:
+        obj: object that was packed
+    """
+    return pickle.loads(bvalues)
+
+
+
+ +
+ + + +
+ +
+ +

+

Require module

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ check(module, min_version=None, max_version=None) + + + cached + + +

+ + +
+ +

Check if module is available for import

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
module + str + +
+

name of the module to check

+
+
+ required +
min_version + Optional[str] + +
+

optional minimum version string to check

+
+
+ None +
max_version + Optional[str] + +
+

optional maximum version string to check

+
+
+ None +
+ +
+ Source code in molfeat/utils/requires.py +
 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
@functools.lru_cache()
+def check(module: str, min_version: Optional[str] = None, max_version: Optional[str] = None):
+    """Check if module is available for import
+
+    Args:
+        module: name of the module to check
+        min_version: optional minimum version string to check
+        max_version: optional maximum version string to check
+    """
+    imported_module = None
+    version = None
+    min_version = pkg_version.parse(min_version) if min_version is not None else None
+    max_version = pkg_version.parse(max_version) if max_version is not None else None
+    try:
+        imported_module = importlib.import_module(module)
+        version = getattr(imported_module, "__version__", None)
+    except ImportError:
+        return False
+    if version is not None:
+        try:
+            version = pkg_version.parse(version)
+        except pkg_version.InvalidVersion:
+            # EN: packaging v22 removed LegacyVersion which has consequences
+            version = None
+    return version is None or (
+        (min_version is None or version >= min_version)
+        and (max_version is None or version <= max_version)
+    )
+
+
+
+ +
+ + +
+ + + + +

+ mock(name) + +

+ + +
+ +

Mock a function to raise an error

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
name + str + +
+

name of the module or function to mock

+
+
+ required +
+ +
+ Source code in molfeat/utils/requires.py +
38
+39
+40
+41
+42
+43
+44
+45
def mock(name: str):
+    """Mock a function to raise an error
+
+    Args:
+        name: name of the module or function to mock
+
+    """
+    return lambda: (_ for _ in ()).throw(Exception(f"{name} is not available"))
+
+
+
+ +
+ + + +
+ +
+ +

+

Datatype Conversion

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ as_numpy_array_if_possible(arr, dtype) + +

+ + +
+ +

Convert an input array to a numpy datatype if possible +Args: + arr: input array + dtype: optional numpy datatype

+ +
+ Source code in molfeat/utils/datatype.py +
264
+265
+266
+267
+268
+269
+270
+271
+272
+273
+274
+275
+276
+277
+278
+279
+280
+281
+282
+283
+284
+285
+286
def as_numpy_array_if_possible(arr, dtype: Optional[None]):
+    """Convert an input array to a numpy datatype if possible
+    Args:
+        arr: input array
+        dtype: optional numpy datatype
+    """
+    with suppress(Exception):
+        # we only consider auto casting to numpu
+        # when the user requests 'a numpy datatype'.
+        if (dtype is not None and is_dtype_numpy(dtype)) or (
+            dtype in [pd.DataFrame, "dataframe", "pandas", "df"]
+        ):
+            # skip any non compatible type
+            # meaning it should be a list of list or a list of numpy array or a 2D numpy array.
+            if (
+                isinstance(arr, (list, np.ndarray))
+                and isinstance(arr[0], (np.ndarray, list))
+                and np.isscalar(arr[0][0])
+            ):
+                return sk_utils.check_array(
+                    arr, accept_sparse=True, force_all_finite=False, ensure_2d=False, allow_nd=True
+                )
+    return arr
+
+
+
+ +
+ + +
+ + + + +

+ cast(fp, dtype=None, columns=None) + +

+ + +
+ +

Change the datatype of a list of input array

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
fp + array + +
+

Input array to cast (2D)

+
+
+ required +
dtype + Optional[Callable] + +
+

datatype to cast to

+
+
+ None +
columns + Optional[Iterable] + +
+

column names for pandas dataframe

+
+
+ None +
+ +
+ Source code in molfeat/utils/datatype.py +
223
+224
+225
+226
+227
+228
+229
+230
+231
+232
+233
+234
+235
+236
+237
+238
+239
+240
+241
+242
+243
+244
+245
+246
+247
+248
+249
+250
+251
+252
+253
+254
+255
+256
+257
+258
+259
+260
+261
def cast(fp, dtype: Optional[Callable] = None, columns: Optional[Iterable] = None):
+    """Change the datatype of a list of input array
+
+    Args:
+        fp (array): Input array to cast (2D)
+        dtype: datatype to cast to
+        columns: column names for pandas dataframe
+    """
+    if fp is None or dtype is None:
+        return fp
+    if isinstance(fp, dict):
+        fp = {k: cast(v, dtype=dtype, columns=columns) for k, v in fp.items()}
+    elif dtype in [tuple, list]:
+        fp = list(fp)
+    elif is_dtype_numpy(dtype):
+        if isinstance(fp, (list, tuple)) and not np.isscalar(fp[0]):
+            fp = [to_numpy(fp_i, dtype=dtype) for fp_i in fp]
+            fp = to_numpy(fp, dtype=dtype)
+        else:
+            fp = to_numpy(fp, dtype=dtype)
+    elif is_dtype_tensor(dtype):
+        if isinstance(fp, (list, tuple)) and not np.isscalar(fp[0]):
+            tmp_fp = to_numpy(fp[0])
+            if len(tmp_fp.shape) > 1:
+                fp = torch.cat([to_tensor(fp_i, dtype=dtype) for fp_i in fp])
+            else:
+                fp = torch.stack([to_tensor(fp_i, dtype=dtype) for fp_i in fp])
+        else:
+            fp = to_tensor(fp, dtype=dtype)
+    elif dtype in [pd.DataFrame, "dataframe", "pandas", "df"]:
+        fp = [feat if feat is not None else [] for feat in fp]
+        fp = pd.DataFrame(fp)
+        if columns is not None:
+            fp.columns = columns
+    elif is_dtype_bitvect(dtype):
+        fp = [to_fp(feat, sparse=(dtype == SparseBitVect)) for feat in fp]
+    else:
+        raise TypeError("The type {} is not supported".format(dtype))
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ ensure_explicit(x) + +

+ + +
+ +

Ensure that the input vector is not a sparse bit vector

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
x + Union[SparseBitVect, ExplicitBitVect] + +
+

input vector

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
converted + +
+

ExplicitBitVect if input is SparseBitVec, else input as is

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
def ensure_explicit(x: Union[SparseBitVect, ExplicitBitVect]):
+    """Ensure that the input vector is not a sparse bit vector
+
+    Args:
+        x: input vector
+
+    Returns:
+        converted: ExplicitBitVect if input is SparseBitVec, else input as is
+    """
+    if isinstance(x, SparseBitVect):
+        x = ConvertToExplicit(x)
+    return x
+
+
+
+ +
+ + +
+ + + + +

+ is_dtype_bitvect(dtype) + +

+ + +
+ +

Verify if the dtype is a bitvect type

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + callable + +
+

The dtype of a value. E.g. np.int32, str, torch.float

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A boolean saying if the dtype is a torch dtype

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
def is_dtype_bitvect(dtype):
+    """
+    Verify if the dtype is a bitvect type
+
+    Args:
+        dtype (callable): The dtype of a value. E.g. np.int32, str, torch.float
+
+    Returns:
+        A boolean saying if the dtype is a torch dtype
+    """
+    return dtype in [ExplicitBitVect, SparseBitVect] or isinstance(
+        dtype, (ExplicitBitVect, SparseBitVect)
+    )
+
+
+
+ +
+ + +
+ + + + +

+ is_dtype_numpy(dtype) + +

+ + +
+ +

Verify if the dtype is a numpy dtype

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + callable + +
+

The dtype of a value. E.g. np.int32, str, torch.float

+
+
+ required +
+

Returns + A boolean saying if the dtype is a numpy dtype

+ +
+ Source code in molfeat/utils/datatype.py +
185
+186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
def is_dtype_numpy(dtype):
+    r"""
+    Verify if the dtype is a numpy dtype
+
+    Args:
+        dtype (callable): The dtype of a value. E.g. np.int32, str, torch.float
+    Returns
+        A boolean saying if the dtype is a numpy dtype
+    """
+    # special case where user provides a type
+    if isinstance(dtype, str):
+        with suppress(Exception):
+            dtype = np.dtype(dtype).type
+    is_torch = is_dtype_tensor(dtype)
+    is_num = dtype in (int, float, complex)
+    if hasattr(dtype, "__module__"):
+        is_numpy = dtype.__module__ == "numpy"
+    else:
+        is_numpy = False
+    return (is_num or is_numpy) and not is_torch
+
+
+
+ +
+ + +
+ + + + +

+ is_dtype_tensor(dtype) + +

+ + +
+ +

Verify if the dtype is a torch dtype

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dtype + callable + +
+

The dtype of a value. E.g. np.int32, str, torch.float

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

A boolean saying if the dtype is a torch dtype

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
def is_dtype_tensor(dtype):
+    r"""
+    Verify if the dtype is a torch dtype
+
+    Args:
+        dtype (callable): The dtype of a value. E.g. np.int32, str, torch.float
+
+    Returns:
+        A boolean saying if the dtype is a torch dtype
+    """
+    return isinstance(dtype, torch.dtype) or (dtype == torch.Tensor)
+
+
+
+ +
+ + +
+ + + + +

+ is_null(obj) + +

+ + +
+ +

Check if an obj is null (nan, None or array of nan)

+ +
+ Source code in molfeat/utils/datatype.py +
207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
def is_null(obj):
+    """Check if an obj is null (nan, None or array of nan)"""
+    array_nan = False
+    all_none = False
+    try:
+        tmp = to_numpy(obj)
+        array_nan = np.all(np.isnan(tmp))
+    except Exception:
+        pass
+    try:
+        all_none = all(x is None for x in obj)
+    except Exception:
+        pass
+    return obj is None or all_none or array_nan
+
+
+
+ +
+ + +
+ + + + +

+ to_fp(arr, bitvect=True, sparse=False) + +

+ + +
+ +

Convert numpy array to fingerprint

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
arr + ndarray + +
+

Numpy array to convert to bitvec

+
+
+ required +
bitvect + bool + +
+

whether to assume the data is a bitvect or intvect

+
+
+ True +
sparse + bool + +
+

whether to convert to sparse bit vect

+
+
+ False +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
fp + +
+

RDKit bit vector

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
def to_fp(arr: np.ndarray, bitvect: bool = True, sparse: bool = False):
+    """Convert numpy array to fingerprint
+
+    Args:
+        arr: Numpy array to convert to bitvec
+        bitvect: whether to assume the data is a bitvect or intvect
+        sparse: whether to convert to sparse bit vect
+
+    Returns:
+        fp: RDKit bit vector
+    """
+    if not isinstance(arr, list) and arr.ndim > 1:
+        raise ValueError("Expect a 1D array as input !")
+    if not bitvect:
+        fp = UIntSparseIntVect(len(arr))
+        for ix, value in enumerate(arr):
+            fp[ix] = int(value)
+    elif sparse:
+        onbits = np.where(arr == 1)[0].tolist()
+        fp = SparseBitVect(arr.shape[0])
+        fp.SetBitsFromList(onbits)
+    else:
+        arr = np.asarray(arr)
+        bitstring = "".join(arr.astype(str))
+        fp = CreateFromBitString(bitstring)
+    return fp
+
+
+
+ +
+ + +
+ + + + +

+ to_numpy(x, copy=False, dtype=None) + +

+ + +
+ +

Convert a tensor to numpy array.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
x + Object + +
+

The Python object to convert.

+
+
+ required +
copy + bool + +
+

Whether to copy the memory. +By default, if a tensor is already on CPU, the +Numpy array will be a view of the tensor.

+
+
+ False +
dtype + callable + +
+

Optional type to cast the values to

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + + + + + +
TypeDescription
+ +
+

A new Python object with the same structure as x but where the tensors are now Numpy

+
+
+ +
+

arrays. Not supported type are left as reference in the new object.

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
def to_numpy(x, copy=False, dtype=None):
+    r"""
+    Convert a tensor to numpy array.
+
+    Args:
+        x (Object): The Python object to convert.
+        copy (bool, optional): Whether to copy the memory.
+            By default, if a tensor is already on CPU, the
+            Numpy array will be a view of the tensor.
+        dtype (callable, optional): Optional type to cast the values to
+
+    Returns:
+        A new Python object with the same structure as `x` but where the tensors are now Numpy
+        arrays. Not supported type are left as reference in the new object.
+    """
+    if isinstance(x, (list, tuple, np.ndarray)) and torch.is_tensor(x[0]):
+        x = [to_numpy(xi, copy=copy, dtype=dtype) for xi in x]
+    if isinstance(x, np.ndarray):
+        pass
+    elif torch.is_tensor(x):
+        x = x.cpu().detach().numpy()
+        x = x.copy()
+    elif isinstance(x, SparseBitVect):
+        tmp = np.zeros(x.GetNumBits(), dtype=int)
+        for n_bit in list(x.GetOnBits()):
+            tmp[n_bit] = 1
+        x = tmp
+    elif isinstance(x, ExplicitBitVect):
+        x = dm.fp_to_array(x)
+    elif hasattr(x, "GetNonzeroElements"):
+        # one of the other rdkit type
+        tmp = np.zeros(x.GetLength())
+        bit_idx, values = np.array(list(x.GetNonzeroElements().items())).T
+        tmp[bit_idx] = values
+        x = tmp
+    else:
+        x = np.asarray(x)
+    if dtype is not None:
+        x = x.astype(dtype)
+    return x
+
+
+
+ +
+ + +
+ + + + +

+ to_sparse(x, dtype=None) + +

+ + +
+ +

Converts dense tensor x to sparse format

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
x + Tensor + +
+

tensor to convert

+
+
+ required +
dtype + dtype + +
+

Enforces new data type for the output. +If None, it keeps the same datatype as x (Default: None)

+
+
+ None +
+

Returns: + new torch.sparse Tensor

+ +
+ Source code in molfeat/utils/datatype.py +
34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
def to_sparse(x, dtype=None):
+    r"""
+    Converts dense tensor x to sparse format
+
+    Args:
+        x (torch.Tensor): tensor to convert
+        dtype (torch.dtype, optional): Enforces new data type for the output.
+            If None, it keeps the same datatype as x (Default: None)
+    Returns:
+        new torch.sparse Tensor
+    """
+
+    if dtype is not None:
+        x = x.type(dtype)
+
+    x_typename = torch.typename(x).split(".")[-1]
+    sparse_tensortype = getattr(torch.sparse, x_typename)
+
+    indices = torch.nonzero(x)
+    if len(indices.shape) == 0:  # if all elements are zeros
+        return sparse_tensortype(*x.shape)
+    indices = indices.t()
+    values = x[tuple(indices[i] for i in range(indices.shape[0]))]
+    return sparse_tensortype(indices, values, x.size())
+
+
+
+ +
+ + +
+ + + + +

+ to_tensor(x, gpu=False, dtype=None) + +

+ + +
+ +

Convert a numpy array to tensor. The tensor type will be +the same as the original array, unless specify otherwise

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
x + ndarray + +
+

Numpy array to convert to tensor type

+
+
+ required +
gpu + bool optional + +
+

Whether to move tensor to gpu. Default False

+
+
+ False +
dtype + dtype + +
+

Enforces new data type for the output

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

New torch.Tensor

+
+
+ +
+ Source code in molfeat/utils/datatype.py +
60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
+79
+80
+81
+82
+83
+84
def to_tensor(x, gpu=False, dtype=None):
+    r"""
+    Convert a numpy array to tensor. The tensor type will be
+    the same as the original array, unless specify otherwise
+
+    Args:
+        x (numpy.ndarray): Numpy array to convert to tensor type
+        gpu (bool optional): Whether to move tensor to gpu. Default False
+        dtype (torch.dtype, optional): Enforces new data type for the output
+
+    Returns:
+        New torch.Tensor
+    """
+    if not torch.is_tensor(x):
+        try:
+            if torch.is_tensor(x[0]):
+                x = torch.stack(x)
+        except Exception:
+            pass
+        x = torch.as_tensor(x)
+    if dtype is not None:
+        x = x.to(dtype=dtype)
+    if gpu and torch.cuda.is_available():
+        x = x.cuda()
+    return x
+
+
+
+ +
+ + + +
+ +
+ +

+

Pooling

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ BartPooler + + +

+ + +
+

+ Bases: Module

+ + +

Default Bart pooler as implemented in huggingface transformers +The Bart pooling function focusing on the eos token ([EOS]) to get a sentence representation.

+ +
+ Source code in molfeat/utils/pooler.py +
157
+158
+159
+160
+161
+162
+163
+164
+165
+166
+167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
class BartPooler(nn.Module):
+    """
+    Default Bart pooler as implemented in huggingface transformers
+    The Bart pooling function focusing on the eos token ([EOS]) to get a sentence representation.
+    """
+
+    def __init__(self, config, **kwargs):
+        super().__init__()
+        self.config = config
+
+    def forward(
+        self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+    ) -> torch.Tensor:
+        """Forward pass of the pooling layer
+
+        Args:
+            h: hidden representation of the input sequence to pool over
+            inputs: inputs tokens to the bart underlying model
+
+        Returns:
+            pooled_output: pooled representation of the input sequence
+        """
+        eos_mask = inputs.eq(self.config.get("eos_token_id"))
+        if len(torch.unique_consecutive(eos_mask.sum(1))) > 1:
+            raise ValueError("All examples must have the same number of <eos> tokens.")
+        pooled_output = h[eos_mask, :].view(h.size(0), -1, h.size(-1))[:, -1, :]
+        return pooled_output
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ forward(h, inputs=None, **kwargs) + +

+ + +
+ +

Forward pass of the pooling layer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
h + Tensor + +
+

hidden representation of the input sequence to pool over

+
+
+ required +
inputs + Optional[Tensor] + +
+

inputs tokens to the bart underlying model

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
pooled_output + Tensor + +
+

pooled representation of the input sequence

+
+
+ +
+ Source code in molfeat/utils/pooler.py +
167
+168
+169
+170
+171
+172
+173
+174
+175
+176
+177
+178
+179
+180
+181
+182
+183
def forward(
+    self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+) -> torch.Tensor:
+    """Forward pass of the pooling layer
+
+    Args:
+        h: hidden representation of the input sequence to pool over
+        inputs: inputs tokens to the bart underlying model
+
+    Returns:
+        pooled_output: pooled representation of the input sequence
+    """
+    eos_mask = inputs.eq(self.config.get("eos_token_id"))
+    if len(torch.unique_consecutive(eos_mask.sum(1))) > 1:
+        raise ValueError("All examples must have the same number of <eos> tokens.")
+    pooled_output = h[eos_mask, :].view(h.size(0), -1, h.size(-1))[:, -1, :]
+    return pooled_output
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ BertPooler + + +

+ + +
+

+ Bases: Module

+ + +

Default Bert pooler as implemented in huggingface transformers +The bert pooling function focuses on a projection of the first token ([CLS]) to get a sentence representation.

+ +
+ Source code in molfeat/utils/pooler.py +
115
+116
+117
+118
+119
+120
+121
+122
+123
+124
+125
+126
+127
+128
+129
+130
+131
+132
+133
+134
+135
+136
+137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
class BertPooler(nn.Module):
+    """
+    Default Bert pooler as implemented in huggingface transformers
+    The bert pooling function focuses on a projection of the first token ([CLS]) to get a sentence representation.
+    """
+
+    def __init__(
+        self,
+        config,
+        activation: Optional[Callable] = None,
+        random_seed: int = None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.config = config
+        self.random_seed = random_seed
+        if self.random_seed is not None:
+            torch.manual_seed(self.random_seed)
+        hidden_size = config.get("hidden_size")
+        self.dense = nn.Linear(hidden_size, hidden_size)
+        self.activation = nn.Tanh() if activation is None else activation
+
+    def forward(
+        self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+    ) -> torch.Tensor:
+        """Forward pass of the pooling layer
+
+        Args:
+            h: hidden representation of the input sequence to pool over
+            inputs: optional input that has been provided to the underlying bert model
+
+        Returns:
+            pooled_output: pooled representation of the input sequence
+        """
+        # We "pool" the model by simply taking the hidden state corresponding
+        # to the first token.
+        first_token_tensor = h[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ forward(h, inputs=None, **kwargs) + +

+ + +
+ +

Forward pass of the pooling layer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
h + Tensor + +
+

hidden representation of the input sequence to pool over

+
+
+ required +
inputs + Optional[Tensor] + +
+

optional input that has been provided to the underlying bert model

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
pooled_output + Tensor + +
+

pooled representation of the input sequence

+
+
+ +
+ Source code in molfeat/utils/pooler.py +
137
+138
+139
+140
+141
+142
+143
+144
+145
+146
+147
+148
+149
+150
+151
+152
+153
+154
def forward(
+    self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+) -> torch.Tensor:
+    """Forward pass of the pooling layer
+
+    Args:
+        h: hidden representation of the input sequence to pool over
+        inputs: optional input that has been provided to the underlying bert model
+
+    Returns:
+        pooled_output: pooled representation of the input sequence
+    """
+    # We "pool" the model by simply taking the hidden state corresponding
+    # to the first token.
+    first_token_tensor = h[:, 0]
+    pooled_output = self.dense(first_token_tensor)
+    pooled_output = self.activation(pooled_output)
+    return pooled_output
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ GPTPooler + + +

+ + +
+

+ Bases: Module

+ + +

Default GPT pooler as implemented in huggingface transformers +The GPT pooling function focusing on the last non-padding token given sequence length to get a sentence representation.

+ +
+ Source code in molfeat/utils/pooler.py +
186
+187
+188
+189
+190
+191
+192
+193
+194
+195
+196
+197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
class GPTPooler(nn.Module):
+    """
+    Default GPT pooler as implemented in huggingface transformers
+    The GPT pooling function focusing on the last non-padding token given sequence length to get a sentence representation.
+    """
+
+    def __init__(self, config, **kwargs):
+        super().__init__()
+        self.config = config
+        self.pad_token_id = config.get("pad_token_id")
+
+    def forward(
+        self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+    ) -> torch.Tensor:
+        """Forward pass of the pooling layer
+
+        Args:
+            h: hidden representation of the input sequence to pool over
+            inputs: inputs tokens to the bart underlying model
+
+        Returns:
+            pooled_output: pooled representation of the input sequence
+        """
+        batch_size, sequence_lengths = inputs.shape[:2]
+
+        assert (
+            self.pad_token_id is not None or batch_size == 1
+        ), "Cannot handle batch sizes > 1 if no padding token is defined."
+        if self.pad_token_id is None:
+            sequence_lengths = -1
+            logger.warning(
+                f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+                f"unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+            )
+        else:
+            sequence_lengths = torch.ne(inputs, self.pad_token_id).sum(-1) - 1
+        pooled_output = h[torch.arange(batch_size), sequence_lengths]
+        return pooled_output
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ forward(h, inputs=None, **kwargs) + +

+ + +
+ +

Forward pass of the pooling layer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
h + Tensor + +
+

hidden representation of the input sequence to pool over

+
+
+ required +
inputs + Optional[Tensor] + +
+

inputs tokens to the bart underlying model

+
+
+ None +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
pooled_output + Tensor + +
+

pooled representation of the input sequence

+
+
+ +
+ Source code in molfeat/utils/pooler.py +
197
+198
+199
+200
+201
+202
+203
+204
+205
+206
+207
+208
+209
+210
+211
+212
+213
+214
+215
+216
+217
+218
+219
+220
+221
+222
+223
def forward(
+    self, h: torch.Tensor, inputs: Optional[torch.Tensor] = None, **kwargs
+) -> torch.Tensor:
+    """Forward pass of the pooling layer
+
+    Args:
+        h: hidden representation of the input sequence to pool over
+        inputs: inputs tokens to the bart underlying model
+
+    Returns:
+        pooled_output: pooled representation of the input sequence
+    """
+    batch_size, sequence_lengths = inputs.shape[:2]
+
+    assert (
+        self.pad_token_id is not None or batch_size == 1
+    ), "Cannot handle batch sizes > 1 if no padding token is defined."
+    if self.pad_token_id is None:
+        sequence_lengths = -1
+        logger.warning(
+            f"{self.__class__.__name__} will not detect padding tokens in `inputs_embeds`. Results may be "
+            f"unexpected if using padding tokens in conjunction with `inputs_embeds.`"
+        )
+    else:
+        sequence_lengths = torch.ne(inputs, self.pad_token_id).sum(-1) - 1
+    pooled_output = h[torch.arange(batch_size), sequence_lengths]
+    return pooled_output
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ HFPooler + + +

+ + +
+

+ Bases: Module

+ + +

Default Pooler based on Molfeat Pooling layer

+ +
+ Source code in molfeat/utils/pooler.py +
 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
class HFPooler(nn.Module):
+    """Default Pooler based on Molfeat Pooling layer"""
+
+    def __init__(self, config, dim: int = 1, name: str = "mean", **kwargs):
+        super().__init__()
+        self.config = config
+        self.pooling = Pooling(dim=dim, name=name)
+
+    def forward(
+        self,
+        h: torch.Tensor,
+        inputs: Optional[torch.Tensor] = None,
+        mask: Optional[torch.Tensor] = None,
+        ignore_padding: bool = True,
+        **kwargs,
+    ) -> torch.Tensor:
+        """Forward pass of the pooling layer
+
+        Args:
+            h: hidden representation of the input sequence to pool over
+            inputs: optional input that has been provided to the underlying bert model
+            mask: optional mask to use in place of computing the padding specific mask
+            ignore_padding: whether to ignore padding tokens when pooling
+
+        Returns:
+            pooled_output: pooled representation of the input sequence
+        """
+        if mask is None and ignore_padding:
+            mask = inputs.ne(self.config.get("pad_token_id"))
+        if mask.ndim == 2:
+            mask = mask.unsqueeze(-1)  # B, S, 1
+        return self.pooling(h, indices=None, mask=mask)
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ forward(h, inputs=None, mask=None, ignore_padding=True, **kwargs) + +

+ + +
+ +

Forward pass of the pooling layer

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
h + Tensor + +
+

hidden representation of the input sequence to pool over

+
+
+ required +
inputs + Optional[Tensor] + +
+

optional input that has been provided to the underlying bert model

+
+
+ None +
mask + Optional[Tensor] + +
+

optional mask to use in place of computing the padding specific mask

+
+
+ None +
ignore_padding + bool + +
+

whether to ignore padding tokens when pooling

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
pooled_output + Tensor + +
+

pooled representation of the input sequence

+
+
+ +
+ Source code in molfeat/utils/pooler.py +
 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
def forward(
+    self,
+    h: torch.Tensor,
+    inputs: Optional[torch.Tensor] = None,
+    mask: Optional[torch.Tensor] = None,
+    ignore_padding: bool = True,
+    **kwargs,
+) -> torch.Tensor:
+    """Forward pass of the pooling layer
+
+    Args:
+        h: hidden representation of the input sequence to pool over
+        inputs: optional input that has been provided to the underlying bert model
+        mask: optional mask to use in place of computing the padding specific mask
+        ignore_padding: whether to ignore padding tokens when pooling
+
+    Returns:
+        pooled_output: pooled representation of the input sequence
+    """
+    if mask is None and ignore_padding:
+        mask = inputs.ne(self.config.get("pad_token_id"))
+    if mask.ndim == 2:
+        mask = mask.unsqueeze(-1)  # B, S, 1
+    return self.pooling(h, indices=None, mask=mask)
+
+
+
+ +
+ + + +
+ +
+ +
+ +
+ + + + +

+ Pooling + + +

+ + +
+

+ Bases: Module

+ + +

Perform simple pooling on a tensor over one dimension

+ +
+ Source code in molfeat/utils/pooler.py +
33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
class Pooling(nn.Module):
+    """
+    Perform simple pooling on a tensor over one dimension
+    """
+
+    SUPPORTED_POOLING = ["mean", "avg", "max", "sum", "clf", None]
+
+    def __init__(self, dim: int = 1, name: str = "max"):
+        """
+        Pooling for embeddings
+
+        Args:
+            dim: dimension to pool over, default is 1
+            name: pooling type. Default is 'mean'.
+        """
+        super().__init__()
+        self.dim = dim
+        self.name = name
+
+    def forward(self, x, indices: List[int] = None, mask: torch.Tensor = None) -> torch.Tensor:
+        """Perform a pooling operation on the input tensor
+
+        Args:
+            x: input tensor to pull over
+            indices: Subset of indices to pool over. Defaults to None for all indices.
+            mask: binary mask to apply when pooling. Defaults to None, which is a matrix of 1.
+                If mask is provided it takes precedence over indices.
+        """
+        x = torch.as_tensor(x)
+        if mask is None:
+            mask = torch.ones_like(x)
+        if indices is not None:
+            mask[:, indices] = 0
+        neg_inf = torch.finfo(x.dtype).min
+        if mask.ndim == 2:
+            mask = mask.unsqueeze(-1)  # B, S, 1
+        if self.name == "clf":
+            return x[:, 0, :]
+        if self.name == "max":
+            tmp = x.masked_fill(mask, neg_inf)
+            return torch.max(tmp, dim=self.dim)[0]
+        elif self.name in ["mean", "avg"]:
+            return torch.sum(x * mask, dim=self.dim) / mask.sum(self.dim)
+        elif self.name == "sum":
+            return torch.sum(x * mask, dim=self.dim)
+        return x
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(dim=1, name='max') + +

+ + +
+ +

Pooling for embeddings

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
dim + int + +
+

dimension to pool over, default is 1

+
+
+ 1 +
name + str + +
+

pooling type. Default is 'mean'.

+
+
+ 'max' +
+ +
+ Source code in molfeat/utils/pooler.py +
40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
def __init__(self, dim: int = 1, name: str = "max"):
+    """
+    Pooling for embeddings
+
+    Args:
+        dim: dimension to pool over, default is 1
+        name: pooling type. Default is 'mean'.
+    """
+    super().__init__()
+    self.dim = dim
+    self.name = name
+
+
+
+ +
+ + +
+ + + + +

+ forward(x, indices=None, mask=None) + +

+ + +
+ +

Perform a pooling operation on the input tensor

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
x + +
+

input tensor to pull over

+
+
+ required +
indices + List[int] + +
+

Subset of indices to pool over. Defaults to None for all indices.

+
+
+ None +
mask + Tensor + +
+

binary mask to apply when pooling. Defaults to None, which is a matrix of 1. +If mask is provided it takes precedence over indices.

+
+
+ None +
+ +
+ Source code in molfeat/utils/pooler.py +
52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
+66
+67
+68
+69
+70
+71
+72
+73
+74
+75
+76
+77
+78
def forward(self, x, indices: List[int] = None, mask: torch.Tensor = None) -> torch.Tensor:
+    """Perform a pooling operation on the input tensor
+
+    Args:
+        x: input tensor to pull over
+        indices: Subset of indices to pool over. Defaults to None for all indices.
+        mask: binary mask to apply when pooling. Defaults to None, which is a matrix of 1.
+            If mask is provided it takes precedence over indices.
+    """
+    x = torch.as_tensor(x)
+    if mask is None:
+        mask = torch.ones_like(x)
+    if indices is not None:
+        mask[:, indices] = 0
+    neg_inf = torch.finfo(x.dtype).min
+    if mask.ndim == 2:
+        mask = mask.unsqueeze(-1)  # B, S, 1
+    if self.name == "clf":
+        return x[:, 0, :]
+    if self.name == "max":
+        tmp = x.masked_fill(mask, neg_inf)
+        return torch.max(tmp, dim=self.dim)[0]
+    elif self.name in ["mean", "avg"]:
+        return torch.sum(x * mask, dim=self.dim) / mask.sum(self.dim)
+    elif self.name == "sum":
+        return torch.sum(x * mask, dim=self.dim)
+    return x
+
+
+
+ +
+ + + +
+ +
+ +
+ + + +
+ + + + +

+ get_default_hgf_pooler(name, config, **kwargs) + +

+ + +
+ +

Get default HuggingFace pooler based on the model name +Args: + name: name of the model + config: config of the model + kwargs: additional arguments to pass to the pooler

+ +
+ Source code in molfeat/utils/pooler.py +
11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
def get_default_hgf_pooler(name, config, **kwargs):
+    """Get default HuggingFace pooler based on the model name
+    Args:
+        name: name of the model
+        config: config of the model
+        kwargs: additional arguments to pass to the pooler
+    """
+    model_type = config.get("model_type", None)
+    if name not in ["bert", "roberta", "gpt", "bart"] and name in Pooling.SUPPORTED_POOLING[:-1]:
+        return HFPooler(config, name=name, **kwargs)
+    names = [name]
+    if model_type is not None:
+        names += [model_type]
+    if any(x in ["bert", "roberta"] for x in names):
+        return BertPooler(config, **kwargs)
+    elif any(x.startswith("gpt") for x in names):
+        return GPTPooler(config, **kwargs)
+    elif any(x == "bart" for x in names):
+        return BartPooler(config, **kwargs)
+    return None
+
+
+
+ +
+ + + +
+ +
+ +

+

Mol Format Converters

+ + +
+ + + + +
+ + + +
+ + + + + + + + +
+ + + + +

+ SmilesConverter + + +

+ + +
+ + +

Molecule line notation conversion from smiles to selfies or inchi

+ +
+ Source code in molfeat/utils/converters.py +
 5
+ 6
+ 7
+ 8
+ 9
+10
+11
+12
+13
+14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
class SmilesConverter:
+    """Molecule line notation conversion from smiles to selfies or inchi"""
+
+    SUPPORTED_LINE_NOTATIONS = [
+        "none",
+        "smiles",
+        "selfies",
+        "inchi",
+    ]
+
+    def __init__(self, target: str = None):
+        """
+        Convert input smiles to a target line notation
+
+        Args:
+            target: target representation.
+        """
+        self.target = target
+
+        if self.target is not None and self.target not in self.SUPPORTED_LINE_NOTATIONS:
+            raise ValueError(
+                f"{target} is not a supported line representation. Choose from {self.SUPPORTED_LINE_NOTATIONS}"
+            )
+
+        if self.target == "smiles" or (self.target is None or self.target == "none"):
+            self.converter = None
+        elif self.target == "inchi":
+            self.converter = types.SimpleNamespace(decode=dm.from_inchi, encode=dm.to_inchi)
+        elif self.target == "selfies":
+            self.converter = types.SimpleNamespace(decode=dm.from_selfies, encode=dm.to_selfies)
+
+    def decode(self, inp: str):
+        """Decode inputs into smiles
+
+        Args:
+            inp: input representation to decode
+        """
+        if self.converter is None:
+            return inp
+        with dm.without_rdkit_log():
+            try:
+                decoded = self.converter.decode(inp)
+                return decoded.strip()
+            except Exception:  # (deepsmiles.DecodeError, ValueError, AttributeError, IndexError):
+                return None
+
+    def encode(self, smiles: str):
+        """Encode a input smiles into target line notation
+
+        Args:
+            smiles: input smiles to encode
+        """
+        if self.converter is None:
+            return smiles
+        with dm.without_rdkit_log():
+            try:
+                encoded = self.converter.encode(smiles)
+                return encoded.strip()
+            except Exception:
+                return None
+
+
+ + + +
+ + + + + + + + + + +
+ + + + +

+ __init__(target=None) + +

+ + +
+ +

Convert input smiles to a target line notation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
target + str + +
+

target representation.

+
+
+ None +
+ +
+ Source code in molfeat/utils/converters.py +
15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
def __init__(self, target: str = None):
+    """
+    Convert input smiles to a target line notation
+
+    Args:
+        target: target representation.
+    """
+    self.target = target
+
+    if self.target is not None and self.target not in self.SUPPORTED_LINE_NOTATIONS:
+        raise ValueError(
+            f"{target} is not a supported line representation. Choose from {self.SUPPORTED_LINE_NOTATIONS}"
+        )
+
+    if self.target == "smiles" or (self.target is None or self.target == "none"):
+        self.converter = None
+    elif self.target == "inchi":
+        self.converter = types.SimpleNamespace(decode=dm.from_inchi, encode=dm.to_inchi)
+    elif self.target == "selfies":
+        self.converter = types.SimpleNamespace(decode=dm.from_selfies, encode=dm.to_selfies)
+
+
+
+ +
+ + +
+ + + + +

+ decode(inp) + +

+ + +
+ +

Decode inputs into smiles

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
inp + str + +
+

input representation to decode

+
+
+ required +
+ +
+ Source code in molfeat/utils/converters.py +
36
+37
+38
+39
+40
+41
+42
+43
+44
+45
+46
+47
+48
+49
def decode(self, inp: str):
+    """Decode inputs into smiles
+
+    Args:
+        inp: input representation to decode
+    """
+    if self.converter is None:
+        return inp
+    with dm.without_rdkit_log():
+        try:
+            decoded = self.converter.decode(inp)
+            return decoded.strip()
+        except Exception:  # (deepsmiles.DecodeError, ValueError, AttributeError, IndexError):
+            return None
+
+
+
+ +
+ + +
+ + + + +

+ encode(smiles) + +

+ + +
+ +

Encode a input smiles into target line notation

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
smiles + str + +
+

input smiles to encode

+
+
+ required +
+ +
+ Source code in molfeat/utils/converters.py +
51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
def encode(self, smiles: str):
+    """Encode a input smiles into target line notation
+
+    Args:
+        smiles: input smiles to encode
+    """
+    if self.converter is None:
+        return smiles
+    with dm.without_rdkit_log():
+        try:
+            encoded = self.converter.encode(smiles)
+            return encoded.strip()
+        except Exception:
+            return None
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/api/molfeat.viz.html b/0.9.7/api/molfeat.viz.html new file mode 100644 index 0000000..b3b92e6 --- /dev/null +++ b/0.9.7/api/molfeat.viz.html @@ -0,0 +1,2215 @@ + + + + + + + + + + + + + + + + + + + + + + + molfeat.viz - molfeat + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ + + + + + +
+ + +
+ +
+ + + + + + + + + +
+
+ + + +
+
+
+ + + + + + + +
+
+
+ + + +
+
+
+ + + +
+
+
+ + + +
+
+ + + + + + + +

molfeat.viz

+ + +
+ + + + +
+ + + +
+ + + + + + + + + + +
+ + + + +

+ colors_from_feature_factory(feature_factory, cmap_name='Set1', alpha=1.0) + +

+ + +
+ +

Get a list of colors for a given feature factory. For the same +feature_factory the returned colors will be the same.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
feature_factory + MolChemicalFeatureFactory + +
+

Feature factory to use.

+
+
+ required +
cmap_name + str + +
+

Matplotlib colormap name.

+
+
+ 'Set1' +
alpha + float + +
+

Alpha value for the colors.

+
+
+ 1.0 +
+ + + +

Returns:

+ + + + + + + + + + + + + +
Name TypeDescription
colors + +
+

Dict of feature_name as keys and colors as values.

+
+
+ +
+ Source code in molfeat/viz.py +
14
+15
+16
+17
+18
+19
+20
+21
+22
+23
+24
+25
+26
+27
+28
+29
+30
+31
+32
+33
+34
+35
+36
+37
+38
+39
+40
+41
def colors_from_feature_factory(
+    feature_factory: rdMolChemicalFeatures.MolChemicalFeatureFactory,
+    cmap_name: str = "Set1",
+    alpha: float = 1.0,
+):
+    """Get a list of colors for a given feature factory. For the same
+    `feature_factory` the returned colors will be the same.
+
+    Args:
+        feature_factory: Feature factory to use.
+        cmap_name: Matplotlib colormap name.
+        alpha: Alpha value for the colors.
+
+    Returns:
+        colors: Dict of feature_name as keys and colors as values.
+    """
+    cmap_name = "Set1"
+
+    cmap = matplotlib.cm.get_cmap(cmap_name)
+    cmap_n = cmap.N  # type: ignore
+
+    colors = {}
+    for i, name in enumerate(feature_factory.GetFeatureFamilies()):
+        color: List[float] = list(cmap(i % cmap_n))
+        color[3] = alpha
+        colors[name] = color
+
+    return colors
+
+
+
+ +
+ + +
+ + + + +

+ show_mols(mols) + +

+ + +
+ +

Generate a view of the molecules.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + Union[Mol, List[Mol]] + +
+

A mol or a list of mols.

+
+
+ required +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

nglview.widget.NGLWidget

+
+
+ +
+ Source code in molfeat/viz.py +
44
+45
+46
+47
+48
+49
+50
+51
+52
+53
+54
+55
+56
+57
+58
+59
+60
+61
+62
+63
+64
+65
def show_mols(mols: Union[dm.Mol, List[dm.Mol]]):
+    """Generate a view of the molecules.
+
+    Args:
+        mols: A mol or a list of mols.
+
+    Returns:
+        nglview.widget.NGLWidget
+    """
+
+    import nglview as nv
+
+    if isinstance(mols, dm.Mol):
+        mols = [mols]
+
+    view = nv.NGLWidget()
+    for mol in mols:
+        component = view.add_component(mol)
+        component.clear()  # type: ignore
+        component.add_ball_and_stick(multipleBond=True)  # type: ignore
+
+    return view
+
+
+
+ +
+ + +
+ + + + +

+ show_pharm_features(mols, features, feature_factory, alpha=1.0, sphere_radius=0.4, show_legend=True) + +

+ + +
+ +

Generate a view of the molecules with pharmacophoric features.

+ + + +

Parameters:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
NameTypeDescriptionDefault
mols + Union[Mol, List[Mol]] + +
+

A mol or a list of mols.

+
+
+ required +
features + DataFrame + +
+

Features data. Columns must contain at least +"feature_name", "feature_id", and "feature_coords".

+
+
+ required +
feature_factory + MolChemicalFeatureFactory + +
+

Feature factory to display consistent colors.

+
+
+ required +
alpha + float + +
+

Alpha value for the colors (currently not working).

+
+
+ 1.0 +
sphere_radius + float + +
+

Radius of the spheres for the features.

+
+
+ 0.4 +
show_legend + bool + +
+

Display the legend (the layout is bad but at least it +shows the legend).

+
+
+ True +
+ + + +

Returns:

+ + + + + + + + + + + + + +
TypeDescription
+ +
+

nglview.widget.NGLWidget

+
+
+ +
+ Source code in molfeat/viz.py +
 68
+ 69
+ 70
+ 71
+ 72
+ 73
+ 74
+ 75
+ 76
+ 77
+ 78
+ 79
+ 80
+ 81
+ 82
+ 83
+ 84
+ 85
+ 86
+ 87
+ 88
+ 89
+ 90
+ 91
+ 92
+ 93
+ 94
+ 95
+ 96
+ 97
+ 98
+ 99
+100
+101
+102
+103
+104
+105
+106
+107
+108
+109
+110
+111
+112
+113
+114
+115
+116
+117
+118
+119
def show_pharm_features(
+    mols: Union[dm.Mol, List[dm.Mol]],
+    features: pd.DataFrame,
+    feature_factory: rdMolChemicalFeatures.MolChemicalFeatureFactory,
+    alpha: float = 1.0,
+    sphere_radius: float = 0.4,
+    show_legend: bool = True,
+):
+    """Generate a view of the molecules with pharmacophoric features.
+
+    Args:
+        mols: A mol or a list of mols.
+        features: Features data. Columns must contain at least
+            "feature_name", "feature_id", and "feature_coords".
+        feature_factory: Feature factory to display consistent colors.
+        alpha: Alpha value for the colors (currently not working).
+        sphere_radius: Radius of the spheres for the features.
+        show_legend: Display the legend (the layout is bad but at least it
+            shows the legend).
+
+    Returns:
+        nglview.widget.NGLWidget
+    """
+
+    import ipywidgets as ipy
+
+    # Get mols view
+    mol_view = show_mols(mols)
+
+    # Get colors
+    colors = colors_from_feature_factory(feature_factory, alpha=alpha)
+
+    # Add features to the viz
+    for _, row in features.iterrows():
+        color = colors[row["feature_name"]]
+        label = f"{row['feature_name']}_{row['feature_id']}"
+        mol_view.shape.add_sphere(row["coords"], color, sphere_radius, label)  # type: ignore
+
+    if not show_legend:
+        return mol_view
+
+    # Build legend widget
+    colors_widget = _build_colors_widget(colors)
+
+    main_layout = ipy.Layout(
+        display="flex",
+        flex_flow="column",
+        align_content="center",
+    )
+    main_widget = ipy.HBox([mol_view, colors_widget], layout=main_layout)  # type: ignore
+
+    return main_widget
+
+
+
+ +
+ + + +
+ +
+ +
+ + + + + + + + + + + + + +
+
+ + + +
+ + + +
+ + + +
+
+
+
+ + + + + + + + + + + + \ No newline at end of file diff --git a/0.9.7/assets/_mkdocstrings.css b/0.9.7/assets/_mkdocstrings.css new file mode 100644 index 0000000..049a254 --- /dev/null +++ b/0.9.7/assets/_mkdocstrings.css @@ -0,0 +1,64 @@ + +/* Avoid breaking parameter names, etc. in table cells. */ +.doc-contents td code { + word-break: normal !important; +} + +/* No line break before first paragraph of descriptions. */ +.doc-md-description, +.doc-md-description>p:first-child { + display: inline; +} + +/* Max width for docstring sections tables. */ +.doc .md-typeset__table, +.doc .md-typeset__table table { + display: table !important; + width: 100%; +} + +.doc .md-typeset__table tr { + display: table-row; +} + +/* Defaults in Spacy table style. */ +.doc-param-default { + float: right; +} + +/* Keep headings consistent. */ +h1.doc-heading, +h2.doc-heading, +h3.doc-heading, +h4.doc-heading, +h5.doc-heading, +h6.doc-heading { + font-weight: 400; + line-height: 1.5; + color: inherit; + text-transform: none; +} + +h1.doc-heading { + font-size: 1.6rem; +} + +h2.doc-heading { + font-size: 1.2rem; +} + +h3.doc-heading { + font-size: 1.15rem; +} + +h4.doc-heading { + font-size: 1.10rem; +} + +h5.doc-heading { + font-size: 1.05rem; +} + +h6.doc-heading { + font-size: 1rem; +} \ No newline at end of file diff --git a/0.9.7/assets/css/custom-molfeat.css b/0.9.7/assets/css/custom-molfeat.css new file mode 100644 index 0000000..7c89bc7 --- /dev/null +++ b/0.9.7/assets/css/custom-molfeat.css @@ -0,0 +1,103 @@ +:root { + --molfeat-primary: #217EBB; + --molfeat-secondary: #5f6d7a; + + /* Primary color shades */ + --md-primary-fg-color: var(--molfeat-primary); + --md-primary-fg-color--light: var(--molfeat-primary); + --md-primary-fg-color--dark: var(--molfeat-primary); + --md-primary-bg-color: var(--molfeat-secondary); + --md-primary-bg-color--light: var(--molfeat-secondary); + --md-text-link-color: var(--molfeat-secondary); + + /* Accent color shades */ + --md-accent-fg-color: var(--molfeat-secondary); + --md-accent-fg-color--transparent: var(--molfeat-secondary); + --md-accent-bg-color: var(--molfeat-secondary); + --md-accent-bg-color--light: var(--molfeat-secondary); +} + +:root>* { + /* Code block color shades */ + --md-code-bg-color: hsla(0, 0%, 96%, 1); + --md-code-fg-color: hsla(200, 18%, 26%, 1); + + /* Footer */ + --md-footer-bg-color: var(--molfeat-primary); + /* --md-footer-bg-color--dark: hsla(0, 0%, 0%, 0.32); */ + --md-footer-fg-color: var(--molfeat-secondary); + --md-footer-fg-color--light: var(--molfeat-secondary); + --md-footer-fg-color--lighter: var(--molfeat-secondary); + +} + +.md-header { + background-image: linear-gradient(to right, #1E2F6C, #217EBB); +} + +.md-footer { + background-image: linear-gradient(to right, #1E2F6C, #217EBB); +} + +.md-tabs { + background-image: linear-gradient(to right, #F4F6F9, #CCE3f8); +} + +.md-header__topic { + color: rgb(255, 255, 255); +} + +.md-source__repository, +.md-source__icon, +.md-header__button, +.md-nav__title, +.md-search__input, +.md-search__input::placeholder, +.md-search__input~.md-search__icon, +.md-footer__inner.md-grid, +.md-copyright__highlight, +.md-copyright, +.md-footer-meta.md-typeset a, +.md-version { + color: rgb(255, 255, 255) !important; +} + +.md-search__form { + background-color: rgba(255, 255, 255, 0.4); +} + +.md-search-result__article:hover { + background-color: #CCE3f8; +} + +.md-search-result__more:hover, +.md-search-result__more:focus { + background-color: #CCE3f8 !important; +} + +.md-search__input { + color: #222222 !important; +} + +.md-header__topic { + color: rgb(255, 255, 255); + font-size: 1.4em; +} + +/* Increase the size of the logo */ +.md-header__button.md-logo img, +.md-header__button.md-logo svg { + height: 2rem !important; +} + +/* Reduce the margin around the logo */ +.md-header__button.md-logo { + margin: 0.4em; + padding: 0.4em; +} + +/* Remove the `In` and `Out` block in rendered Jupyter notebooks */ +.md-container .jp-Cell-outputWrapper .jp-OutputPrompt.jp-OutputArea-prompt, +.md-container .jp-Cell-inputWrapper .jp-InputPrompt.jp-InputArea-prompt { + display: none !important; +} \ No newline at end of file diff --git a/0.9.7/assets/css/custom.css b/0.9.7/assets/css/custom.css new file mode 100644 index 0000000..54950fb --- /dev/null +++ b/0.9.7/assets/css/custom.css @@ -0,0 +1,33 @@ +/* Indentation. */ +div.doc-contents:not(.first) { + padding-left: 25px; + border-left: 4px solid rgba(230, 230, 230); + margin-bottom: 80px; +} + +/* Don't capitalize names. */ +h5.doc-heading { + text-transform: none !important; +} + +/* Don't use vertical space on hidden ToC entries. */ +.hidden-toc::before { + margin-top: 0 !important; + padding-top: 0 !important; +} + +/* Don't show permalink of hidden ToC entries. */ +.hidden-toc a.headerlink { + display: none; +} + +/* Avoid breaking parameters name, etc. in table cells. */ +td code { + word-break: normal !important; +} + +/* For pieces of Markdown rendered in table cells. */ +td p { + margin-top: 0 !important; + margin-bottom: 0 !important; +} \ No newline at end of file diff --git a/0.9.7/assets/css/tweak-width.css b/0.9.7/assets/css/tweak-width.css new file mode 100644 index 0000000..8ffb12b --- /dev/null +++ b/0.9.7/assets/css/tweak-width.css @@ -0,0 +1,16 @@ +@media only screen and (min-width: 76.25em) { + .md-main__inner { + max-width: none; + padding-left: 2em; + padding-left: 2em; + } + .md-sidebar--primary { + left: 0; + } + .md-sidebar--secondary { + right: 0; + margin-left: 0; + -webkit-transform: none; + transform: none; + } +} \ No newline at end of file diff --git a/0.9.7/assets/images/favicon.png b/0.9.7/assets/images/favicon.png new file mode 100644 index 0000000000000000000000000000000000000000..1cf13b9f9d978896599290a74f77d5dbe7d1655c GIT binary patch literal 1870 zcmV-U2eJ5xP)Gc)JR9QMau)O=X#!i9;T z37kk-upj^(fsR36MHs_+1RCI)NNu9}lD0S{B^g8PN?Ww(5|~L#Ng*g{WsqleV}|#l zz8@ri&cTzw_h33bHI+12+kK6WN$h#n5cD8OQt`5kw6p~9H3()bUQ8OS4Q4HTQ=1Ol z_JAocz`fLbT2^{`8n~UAo=#AUOf=SOq4pYkt;XbC&f#7lb$*7=$na!mWCQ`dBQsO0 zLFBSPj*N?#u5&pf2t4XjEGH|=pPQ8xh7tpx;US5Cx_Ju;!O`ya-yF`)b%TEt5>eP1ZX~}sjjA%FJF?h7cX8=b!DZl<6%Cv z*G0uvvU+vmnpLZ2paivG-(cd*y3$hCIcsZcYOGh{$&)A6*XX&kXZd3G8m)G$Zz-LV z^GF3VAW^Mdv!)4OM8EgqRiz~*Cji;uzl2uC9^=8I84vNp;ltJ|q-*uQwGp2ma6cY7 z;`%`!9UXO@fr&Ebapfs34OmS9^u6$)bJxrucutf>`dKPKT%%*d3XlFVKunp9 zasduxjrjs>f8V=D|J=XNZp;_Zy^WgQ$9WDjgY=z@stwiEBm9u5*|34&1Na8BMjjgf3+SHcr`5~>oz1Y?SW^=K z^bTyO6>Gar#P_W2gEMwq)ot3; zREHn~U&Dp0l6YT0&k-wLwYjb?5zGK`W6S2v+K>AM(95m2C20L|3m~rN8dprPr@t)5lsk9Hu*W z?pS990s;Ez=+Rj{x7p``4>+c0G5^pYnB1^!TL=(?HLHZ+HicG{~4F1d^5Awl_2!1jICM-!9eoLhbbT^;yHcefyTAaqRcY zmuctDopPT!%k+}x%lZRKnzykr2}}XfG_ne?nRQO~?%hkzo;@RN{P6o`&mMUWBYMTe z6i8ChtjX&gXl`nvrU>jah)2iNM%JdjqoaeaU%yVn!^70x-flljp6Q5tK}5}&X8&&G zX3fpb3E(!rH=zVI_9Gjl45w@{(ITqngWFe7@9{mX;tO25Z_8 zQHEpI+FkTU#4xu>RkN>b3Tnc3UpWzPXWm#o55GKF09j^Mh~)K7{QqbO_~(@CVq! zS<8954|P8mXN2MRs86xZ&Q4EfM@JB94b=(YGuk)s&^jiSF=t3*oNK3`rD{H`yQ?d; ztE=laAUoZx5?RC8*WKOj`%LXEkgDd>&^Q4M^z`%u0rg-It=hLCVsq!Z%^6eB-OvOT zFZ28TN&cRmgU}Elrnk43)!>Z1FCPL2K$7}gwzIc48NX}#!A1BpJP?#v5wkNprhV** z?Cpalt1oH&{r!o3eSKc&ap)iz2BTn_VV`4>9M^b3;(YY}4>#ML6{~(4mH+?%07*qo IM6N<$f(jP3KmY&$ literal 0 HcmV?d00001 diff --git a/0.9.7/assets/javascripts/bundle.d7c377c4.min.js b/0.9.7/assets/javascripts/bundle.d7c377c4.min.js new file mode 100644 index 0000000..6a0bcf8 --- /dev/null +++ b/0.9.7/assets/javascripts/bundle.d7c377c4.min.js @@ -0,0 +1,29 @@ +"use strict";(()=>{var Mi=Object.create;var gr=Object.defineProperty;var Li=Object.getOwnPropertyDescriptor;var _i=Object.getOwnPropertyNames,Ft=Object.getOwnPropertySymbols,Ai=Object.getPrototypeOf,xr=Object.prototype.hasOwnProperty,ro=Object.prototype.propertyIsEnumerable;var to=(e,t,r)=>t in e?gr(e,t,{enumerable:!0,configurable:!0,writable:!0,value:r}):e[t]=r,P=(e,t)=>{for(var r in t||(t={}))xr.call(t,r)&&to(e,r,t[r]);if(Ft)for(var r of Ft(t))ro.call(t,r)&&to(e,r,t[r]);return e};var oo=(e,t)=>{var r={};for(var o in e)xr.call(e,o)&&t.indexOf(o)<0&&(r[o]=e[o]);if(e!=null&&Ft)for(var o of Ft(e))t.indexOf(o)<0&&ro.call(e,o)&&(r[o]=e[o]);return r};var yr=(e,t)=>()=>(t||e((t={exports:{}}).exports,t),t.exports);var Ci=(e,t,r,o)=>{if(t&&typeof t=="object"||typeof t=="function")for(let n of _i(t))!xr.call(e,n)&&n!==r&&gr(e,n,{get:()=>t[n],enumerable:!(o=Li(t,n))||o.enumerable});return e};var jt=(e,t,r)=>(r=e!=null?Mi(Ai(e)):{},Ci(t||!e||!e.__esModule?gr(r,"default",{value:e,enumerable:!0}):r,e));var no=(e,t,r)=>new Promise((o,n)=>{var i=c=>{try{a(r.next(c))}catch(p){n(p)}},s=c=>{try{a(r.throw(c))}catch(p){n(p)}},a=c=>c.done?o(c.value):Promise.resolve(c.value).then(i,s);a((r=r.apply(e,t)).next())});var ao=yr((Er,io)=>{(function(e,t){typeof Er=="object"&&typeof io!="undefined"?t():typeof define=="function"&&define.amd?define(t):t()})(Er,function(){"use strict";function e(r){var o=!0,n=!1,i=null,s={text:!0,search:!0,url:!0,tel:!0,email:!0,password:!0,number:!0,date:!0,month:!0,week:!0,time:!0,datetime:!0,"datetime-local":!0};function a(C){return!!(C&&C!==document&&C.nodeName!=="HTML"&&C.nodeName!=="BODY"&&"classList"in C&&"contains"in C.classList)}function c(C){var ct=C.type,Ve=C.tagName;return!!(Ve==="INPUT"&&s[ct]&&!C.readOnly||Ve==="TEXTAREA"&&!C.readOnly||C.isContentEditable)}function p(C){C.classList.contains("focus-visible")||(C.classList.add("focus-visible"),C.setAttribute("data-focus-visible-added",""))}function l(C){C.hasAttribute("data-focus-visible-added")&&(C.classList.remove("focus-visible"),C.removeAttribute("data-focus-visible-added"))}function f(C){C.metaKey||C.altKey||C.ctrlKey||(a(r.activeElement)&&p(r.activeElement),o=!0)}function u(C){o=!1}function d(C){a(C.target)&&(o||c(C.target))&&p(C.target)}function y(C){a(C.target)&&(C.target.classList.contains("focus-visible")||C.target.hasAttribute("data-focus-visible-added"))&&(n=!0,window.clearTimeout(i),i=window.setTimeout(function(){n=!1},100),l(C.target))}function b(C){document.visibilityState==="hidden"&&(n&&(o=!0),D())}function D(){document.addEventListener("mousemove",J),document.addEventListener("mousedown",J),document.addEventListener("mouseup",J),document.addEventListener("pointermove",J),document.addEventListener("pointerdown",J),document.addEventListener("pointerup",J),document.addEventListener("touchmove",J),document.addEventListener("touchstart",J),document.addEventListener("touchend",J)}function Q(){document.removeEventListener("mousemove",J),document.removeEventListener("mousedown",J),document.removeEventListener("mouseup",J),document.removeEventListener("pointermove",J),document.removeEventListener("pointerdown",J),document.removeEventListener("pointerup",J),document.removeEventListener("touchmove",J),document.removeEventListener("touchstart",J),document.removeEventListener("touchend",J)}function J(C){C.target.nodeName&&C.target.nodeName.toLowerCase()==="html"||(o=!1,Q())}document.addEventListener("keydown",f,!0),document.addEventListener("mousedown",u,!0),document.addEventListener("pointerdown",u,!0),document.addEventListener("touchstart",u,!0),document.addEventListener("visibilitychange",b,!0),D(),r.addEventListener("focus",d,!0),r.addEventListener("blur",y,!0),r.nodeType===Node.DOCUMENT_FRAGMENT_NODE&&r.host?r.host.setAttribute("data-js-focus-visible",""):r.nodeType===Node.DOCUMENT_NODE&&(document.documentElement.classList.add("js-focus-visible"),document.documentElement.setAttribute("data-js-focus-visible",""))}if(typeof window!="undefined"&&typeof document!="undefined"){window.applyFocusVisiblePolyfill=e;var t;try{t=new CustomEvent("focus-visible-polyfill-ready")}catch(r){t=document.createEvent("CustomEvent"),t.initCustomEvent("focus-visible-polyfill-ready",!1,!1,{})}window.dispatchEvent(t)}typeof document!="undefined"&&e(document)})});var Kr=yr((kt,qr)=>{/*! + * clipboard.js v2.0.11 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */(function(t,r){typeof kt=="object"&&typeof qr=="object"?qr.exports=r():typeof define=="function"&&define.amd?define([],r):typeof kt=="object"?kt.ClipboardJS=r():t.ClipboardJS=r()})(kt,function(){return function(){var e={686:function(o,n,i){"use strict";i.d(n,{default:function(){return Oi}});var s=i(279),a=i.n(s),c=i(370),p=i.n(c),l=i(817),f=i.n(l);function u(V){try{return document.execCommand(V)}catch(_){return!1}}var d=function(_){var O=f()(_);return u("cut"),O},y=d;function b(V){var _=document.documentElement.getAttribute("dir")==="rtl",O=document.createElement("textarea");O.style.fontSize="12pt",O.style.border="0",O.style.padding="0",O.style.margin="0",O.style.position="absolute",O.style[_?"right":"left"]="-9999px";var $=window.pageYOffset||document.documentElement.scrollTop;return O.style.top="".concat($,"px"),O.setAttribute("readonly",""),O.value=V,O}var D=function(_,O){var $=b(_);O.container.appendChild($);var N=f()($);return u("copy"),$.remove(),N},Q=function(_){var O=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body},$="";return typeof _=="string"?$=D(_,O):_ instanceof HTMLInputElement&&!["text","search","url","tel","password"].includes(_==null?void 0:_.type)?$=D(_.value,O):($=f()(_),u("copy")),$},J=Q;function C(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?C=function(O){return typeof O}:C=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},C(V)}var ct=function(){var _=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{},O=_.action,$=O===void 0?"copy":O,N=_.container,Y=_.target,ke=_.text;if($!=="copy"&&$!=="cut")throw new Error('Invalid "action" value, use either "copy" or "cut"');if(Y!==void 0)if(Y&&C(Y)==="object"&&Y.nodeType===1){if($==="copy"&&Y.hasAttribute("disabled"))throw new Error('Invalid "target" attribute. Please use "readonly" instead of "disabled" attribute');if($==="cut"&&(Y.hasAttribute("readonly")||Y.hasAttribute("disabled")))throw new Error(`Invalid "target" attribute. You can't cut text from elements with "readonly" or "disabled" attributes`)}else throw new Error('Invalid "target" value, use a valid Element');if(ke)return J(ke,{container:N});if(Y)return $==="cut"?y(Y):J(Y,{container:N})},Ve=ct;function Fe(V){"@babel/helpers - typeof";return typeof Symbol=="function"&&typeof Symbol.iterator=="symbol"?Fe=function(O){return typeof O}:Fe=function(O){return O&&typeof Symbol=="function"&&O.constructor===Symbol&&O!==Symbol.prototype?"symbol":typeof O},Fe(V)}function vi(V,_){if(!(V instanceof _))throw new TypeError("Cannot call a class as a function")}function eo(V,_){for(var O=0;O<_.length;O++){var $=_[O];$.enumerable=$.enumerable||!1,$.configurable=!0,"value"in $&&($.writable=!0),Object.defineProperty(V,$.key,$)}}function gi(V,_,O){return _&&eo(V.prototype,_),O&&eo(V,O),V}function xi(V,_){if(typeof _!="function"&&_!==null)throw new TypeError("Super expression must either be null or a function");V.prototype=Object.create(_&&_.prototype,{constructor:{value:V,writable:!0,configurable:!0}}),_&&br(V,_)}function br(V,_){return br=Object.setPrototypeOf||function($,N){return $.__proto__=N,$},br(V,_)}function yi(V){var _=Ti();return function(){var $=Rt(V),N;if(_){var Y=Rt(this).constructor;N=Reflect.construct($,arguments,Y)}else N=$.apply(this,arguments);return Ei(this,N)}}function Ei(V,_){return _&&(Fe(_)==="object"||typeof _=="function")?_:wi(V)}function wi(V){if(V===void 0)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return V}function Ti(){if(typeof Reflect=="undefined"||!Reflect.construct||Reflect.construct.sham)return!1;if(typeof Proxy=="function")return!0;try{return Date.prototype.toString.call(Reflect.construct(Date,[],function(){})),!0}catch(V){return!1}}function Rt(V){return Rt=Object.setPrototypeOf?Object.getPrototypeOf:function(O){return O.__proto__||Object.getPrototypeOf(O)},Rt(V)}function vr(V,_){var O="data-clipboard-".concat(V);if(_.hasAttribute(O))return _.getAttribute(O)}var Si=function(V){xi(O,V);var _=yi(O);function O($,N){var Y;return vi(this,O),Y=_.call(this),Y.resolveOptions(N),Y.listenClick($),Y}return gi(O,[{key:"resolveOptions",value:function(){var N=arguments.length>0&&arguments[0]!==void 0?arguments[0]:{};this.action=typeof N.action=="function"?N.action:this.defaultAction,this.target=typeof N.target=="function"?N.target:this.defaultTarget,this.text=typeof N.text=="function"?N.text:this.defaultText,this.container=Fe(N.container)==="object"?N.container:document.body}},{key:"listenClick",value:function(N){var Y=this;this.listener=p()(N,"click",function(ke){return Y.onClick(ke)})}},{key:"onClick",value:function(N){var Y=N.delegateTarget||N.currentTarget,ke=this.action(Y)||"copy",It=Ve({action:ke,container:this.container,target:this.target(Y),text:this.text(Y)});this.emit(It?"success":"error",{action:ke,text:It,trigger:Y,clearSelection:function(){Y&&Y.focus(),window.getSelection().removeAllRanges()}})}},{key:"defaultAction",value:function(N){return vr("action",N)}},{key:"defaultTarget",value:function(N){var Y=vr("target",N);if(Y)return document.querySelector(Y)}},{key:"defaultText",value:function(N){return vr("text",N)}},{key:"destroy",value:function(){this.listener.destroy()}}],[{key:"copy",value:function(N){var Y=arguments.length>1&&arguments[1]!==void 0?arguments[1]:{container:document.body};return J(N,Y)}},{key:"cut",value:function(N){return y(N)}},{key:"isSupported",value:function(){var N=arguments.length>0&&arguments[0]!==void 0?arguments[0]:["copy","cut"],Y=typeof N=="string"?[N]:N,ke=!!document.queryCommandSupported;return Y.forEach(function(It){ke=ke&&!!document.queryCommandSupported(It)}),ke}}]),O}(a()),Oi=Si},828:function(o){var n=9;if(typeof Element!="undefined"&&!Element.prototype.matches){var i=Element.prototype;i.matches=i.matchesSelector||i.mozMatchesSelector||i.msMatchesSelector||i.oMatchesSelector||i.webkitMatchesSelector}function s(a,c){for(;a&&a.nodeType!==n;){if(typeof a.matches=="function"&&a.matches(c))return a;a=a.parentNode}}o.exports=s},438:function(o,n,i){var s=i(828);function a(l,f,u,d,y){var b=p.apply(this,arguments);return l.addEventListener(u,b,y),{destroy:function(){l.removeEventListener(u,b,y)}}}function c(l,f,u,d,y){return typeof l.addEventListener=="function"?a.apply(null,arguments):typeof u=="function"?a.bind(null,document).apply(null,arguments):(typeof l=="string"&&(l=document.querySelectorAll(l)),Array.prototype.map.call(l,function(b){return a(b,f,u,d,y)}))}function p(l,f,u,d){return function(y){y.delegateTarget=s(y.target,f),y.delegateTarget&&d.call(l,y)}}o.exports=c},879:function(o,n){n.node=function(i){return i!==void 0&&i instanceof HTMLElement&&i.nodeType===1},n.nodeList=function(i){var s=Object.prototype.toString.call(i);return i!==void 0&&(s==="[object NodeList]"||s==="[object HTMLCollection]")&&"length"in i&&(i.length===0||n.node(i[0]))},n.string=function(i){return typeof i=="string"||i instanceof String},n.fn=function(i){var s=Object.prototype.toString.call(i);return s==="[object Function]"}},370:function(o,n,i){var s=i(879),a=i(438);function c(u,d,y){if(!u&&!d&&!y)throw new Error("Missing required arguments");if(!s.string(d))throw new TypeError("Second argument must be a String");if(!s.fn(y))throw new TypeError("Third argument must be a Function");if(s.node(u))return p(u,d,y);if(s.nodeList(u))return l(u,d,y);if(s.string(u))return f(u,d,y);throw new TypeError("First argument must be a String, HTMLElement, HTMLCollection, or NodeList")}function p(u,d,y){return u.addEventListener(d,y),{destroy:function(){u.removeEventListener(d,y)}}}function l(u,d,y){return Array.prototype.forEach.call(u,function(b){b.addEventListener(d,y)}),{destroy:function(){Array.prototype.forEach.call(u,function(b){b.removeEventListener(d,y)})}}}function f(u,d,y){return a(document.body,u,d,y)}o.exports=c},817:function(o){function n(i){var s;if(i.nodeName==="SELECT")i.focus(),s=i.value;else if(i.nodeName==="INPUT"||i.nodeName==="TEXTAREA"){var a=i.hasAttribute("readonly");a||i.setAttribute("readonly",""),i.select(),i.setSelectionRange(0,i.value.length),a||i.removeAttribute("readonly"),s=i.value}else{i.hasAttribute("contenteditable")&&i.focus();var c=window.getSelection(),p=document.createRange();p.selectNodeContents(i),c.removeAllRanges(),c.addRange(p),s=c.toString()}return s}o.exports=n},279:function(o){function n(){}n.prototype={on:function(i,s,a){var c=this.e||(this.e={});return(c[i]||(c[i]=[])).push({fn:s,ctx:a}),this},once:function(i,s,a){var c=this;function p(){c.off(i,p),s.apply(a,arguments)}return p._=s,this.on(i,p,a)},emit:function(i){var s=[].slice.call(arguments,1),a=((this.e||(this.e={}))[i]||[]).slice(),c=0,p=a.length;for(c;c{"use strict";/*! + * escape-html + * Copyright(c) 2012-2013 TJ Holowaychuk + * Copyright(c) 2015 Andreas Lubbe + * Copyright(c) 2015 Tiancheng "Timothy" Gu + * MIT Licensed + */var Wa=/["'&<>]/;Vn.exports=Ua;function Ua(e){var t=""+e,r=Wa.exec(t);if(!r)return t;var o,n="",i=0,s=0;for(i=r.index;i0&&i[i.length-1])&&(p[0]===6||p[0]===2)){r=0;continue}if(p[0]===3&&(!i||p[1]>i[0]&&p[1]=e.length&&(e=void 0),{value:e&&e[o++],done:!e}}};throw new TypeError(t?"Object is not iterable.":"Symbol.iterator is not defined.")}function z(e,t){var r=typeof Symbol=="function"&&e[Symbol.iterator];if(!r)return e;var o=r.call(e),n,i=[],s;try{for(;(t===void 0||t-- >0)&&!(n=o.next()).done;)i.push(n.value)}catch(a){s={error:a}}finally{try{n&&!n.done&&(r=o.return)&&r.call(o)}finally{if(s)throw s.error}}return i}function K(e,t,r){if(r||arguments.length===2)for(var o=0,n=t.length,i;o1||a(u,d)})})}function a(u,d){try{c(o[u](d))}catch(y){f(i[0][3],y)}}function c(u){u.value instanceof ot?Promise.resolve(u.value.v).then(p,l):f(i[0][2],u)}function p(u){a("next",u)}function l(u){a("throw",u)}function f(u,d){u(d),i.shift(),i.length&&a(i[0][0],i[0][1])}}function po(e){if(!Symbol.asyncIterator)throw new TypeError("Symbol.asyncIterator is not defined.");var t=e[Symbol.asyncIterator],r;return t?t.call(e):(e=typeof be=="function"?be(e):e[Symbol.iterator](),r={},o("next"),o("throw"),o("return"),r[Symbol.asyncIterator]=function(){return this},r);function o(i){r[i]=e[i]&&function(s){return new Promise(function(a,c){s=e[i](s),n(a,c,s.done,s.value)})}}function n(i,s,a,c){Promise.resolve(c).then(function(p){i({value:p,done:a})},s)}}function k(e){return typeof e=="function"}function pt(e){var t=function(o){Error.call(o),o.stack=new Error().stack},r=e(t);return r.prototype=Object.create(Error.prototype),r.prototype.constructor=r,r}var Ut=pt(function(e){return function(r){e(this),this.message=r?r.length+` errors occurred during unsubscription: +`+r.map(function(o,n){return n+1+") "+o.toString()}).join(` + `):"",this.name="UnsubscriptionError",this.errors=r}});function ze(e,t){if(e){var r=e.indexOf(t);0<=r&&e.splice(r,1)}}var je=function(){function e(t){this.initialTeardown=t,this.closed=!1,this._parentage=null,this._finalizers=null}return e.prototype.unsubscribe=function(){var t,r,o,n,i;if(!this.closed){this.closed=!0;var s=this._parentage;if(s)if(this._parentage=null,Array.isArray(s))try{for(var a=be(s),c=a.next();!c.done;c=a.next()){var p=c.value;p.remove(this)}}catch(b){t={error:b}}finally{try{c&&!c.done&&(r=a.return)&&r.call(a)}finally{if(t)throw t.error}}else s.remove(this);var l=this.initialTeardown;if(k(l))try{l()}catch(b){i=b instanceof Ut?b.errors:[b]}var f=this._finalizers;if(f){this._finalizers=null;try{for(var u=be(f),d=u.next();!d.done;d=u.next()){var y=d.value;try{lo(y)}catch(b){i=i!=null?i:[],b instanceof Ut?i=K(K([],z(i)),z(b.errors)):i.push(b)}}}catch(b){o={error:b}}finally{try{d&&!d.done&&(n=u.return)&&n.call(u)}finally{if(o)throw o.error}}}if(i)throw new Ut(i)}},e.prototype.add=function(t){var r;if(t&&t!==this)if(this.closed)lo(t);else{if(t instanceof e){if(t.closed||t._hasParent(this))return;t._addParent(this)}(this._finalizers=(r=this._finalizers)!==null&&r!==void 0?r:[]).push(t)}},e.prototype._hasParent=function(t){var r=this._parentage;return r===t||Array.isArray(r)&&r.includes(t)},e.prototype._addParent=function(t){var r=this._parentage;this._parentage=Array.isArray(r)?(r.push(t),r):r?[r,t]:t},e.prototype._removeParent=function(t){var r=this._parentage;r===t?this._parentage=null:Array.isArray(r)&&ze(r,t)},e.prototype.remove=function(t){var r=this._finalizers;r&&ze(r,t),t instanceof e&&t._removeParent(this)},e.EMPTY=function(){var t=new e;return t.closed=!0,t}(),e}();var Tr=je.EMPTY;function Nt(e){return e instanceof je||e&&"closed"in e&&k(e.remove)&&k(e.add)&&k(e.unsubscribe)}function lo(e){k(e)?e():e.unsubscribe()}var He={onUnhandledError:null,onStoppedNotification:null,Promise:void 0,useDeprecatedSynchronousErrorHandling:!1,useDeprecatedNextContext:!1};var lt={setTimeout:function(e,t){for(var r=[],o=2;o0},enumerable:!1,configurable:!0}),t.prototype._trySubscribe=function(r){return this._throwIfClosed(),e.prototype._trySubscribe.call(this,r)},t.prototype._subscribe=function(r){return this._throwIfClosed(),this._checkFinalizedStatuses(r),this._innerSubscribe(r)},t.prototype._innerSubscribe=function(r){var o=this,n=this,i=n.hasError,s=n.isStopped,a=n.observers;return i||s?Tr:(this.currentObservers=null,a.push(r),new je(function(){o.currentObservers=null,ze(a,r)}))},t.prototype._checkFinalizedStatuses=function(r){var o=this,n=o.hasError,i=o.thrownError,s=o.isStopped;n?r.error(i):s&&r.complete()},t.prototype.asObservable=function(){var r=new I;return r.source=this,r},t.create=function(r,o){return new xo(r,o)},t}(I);var xo=function(e){se(t,e);function t(r,o){var n=e.call(this)||this;return n.destination=r,n.source=o,n}return t.prototype.next=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.next)===null||n===void 0||n.call(o,r)},t.prototype.error=function(r){var o,n;(n=(o=this.destination)===null||o===void 0?void 0:o.error)===null||n===void 0||n.call(o,r)},t.prototype.complete=function(){var r,o;(o=(r=this.destination)===null||r===void 0?void 0:r.complete)===null||o===void 0||o.call(r)},t.prototype._subscribe=function(r){var o,n;return(n=(o=this.source)===null||o===void 0?void 0:o.subscribe(r))!==null&&n!==void 0?n:Tr},t}(x);var St={now:function(){return(St.delegate||Date).now()},delegate:void 0};var Ot=function(e){se(t,e);function t(r,o,n){r===void 0&&(r=1/0),o===void 0&&(o=1/0),n===void 0&&(n=St);var i=e.call(this)||this;return i._bufferSize=r,i._windowTime=o,i._timestampProvider=n,i._buffer=[],i._infiniteTimeWindow=!0,i._infiniteTimeWindow=o===1/0,i._bufferSize=Math.max(1,r),i._windowTime=Math.max(1,o),i}return t.prototype.next=function(r){var o=this,n=o.isStopped,i=o._buffer,s=o._infiniteTimeWindow,a=o._timestampProvider,c=o._windowTime;n||(i.push(r),!s&&i.push(a.now()+c)),this._trimBuffer(),e.prototype.next.call(this,r)},t.prototype._subscribe=function(r){this._throwIfClosed(),this._trimBuffer();for(var o=this._innerSubscribe(r),n=this,i=n._infiniteTimeWindow,s=n._buffer,a=s.slice(),c=0;c0?e.prototype.requestAsyncId.call(this,r,o,n):(r.actions.push(this),r._scheduled||(r._scheduled=ut.requestAnimationFrame(function(){return r.flush(void 0)})))},t.prototype.recycleAsyncId=function(r,o,n){var i;if(n===void 0&&(n=0),n!=null?n>0:this.delay>0)return e.prototype.recycleAsyncId.call(this,r,o,n);var s=r.actions;o!=null&&((i=s[s.length-1])===null||i===void 0?void 0:i.id)!==o&&(ut.cancelAnimationFrame(o),r._scheduled=void 0)},t}(zt);var wo=function(e){se(t,e);function t(){return e!==null&&e.apply(this,arguments)||this}return t.prototype.flush=function(r){this._active=!0;var o=this._scheduled;this._scheduled=void 0;var n=this.actions,i;r=r||n.shift();do if(i=r.execute(r.state,r.delay))break;while((r=n[0])&&r.id===o&&n.shift());if(this._active=!1,i){for(;(r=n[0])&&r.id===o&&n.shift();)r.unsubscribe();throw i}},t}(qt);var ge=new wo(Eo);var M=new I(function(e){return e.complete()});function Kt(e){return e&&k(e.schedule)}function Cr(e){return e[e.length-1]}function Ge(e){return k(Cr(e))?e.pop():void 0}function Ae(e){return Kt(Cr(e))?e.pop():void 0}function Qt(e,t){return typeof Cr(e)=="number"?e.pop():t}var dt=function(e){return e&&typeof e.length=="number"&&typeof e!="function"};function Yt(e){return k(e==null?void 0:e.then)}function Bt(e){return k(e[ft])}function Gt(e){return Symbol.asyncIterator&&k(e==null?void 0:e[Symbol.asyncIterator])}function Jt(e){return new TypeError("You provided "+(e!==null&&typeof e=="object"?"an invalid object":"'"+e+"'")+" where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.")}function Wi(){return typeof Symbol!="function"||!Symbol.iterator?"@@iterator":Symbol.iterator}var Xt=Wi();function Zt(e){return k(e==null?void 0:e[Xt])}function er(e){return co(this,arguments,function(){var r,o,n,i;return Wt(this,function(s){switch(s.label){case 0:r=e.getReader(),s.label=1;case 1:s.trys.push([1,,9,10]),s.label=2;case 2:return[4,ot(r.read())];case 3:return o=s.sent(),n=o.value,i=o.done,i?[4,ot(void 0)]:[3,5];case 4:return[2,s.sent()];case 5:return[4,ot(n)];case 6:return[4,s.sent()];case 7:return s.sent(),[3,2];case 8:return[3,10];case 9:return r.releaseLock(),[7];case 10:return[2]}})})}function tr(e){return k(e==null?void 0:e.getReader)}function F(e){if(e instanceof I)return e;if(e!=null){if(Bt(e))return Ui(e);if(dt(e))return Ni(e);if(Yt(e))return Di(e);if(Gt(e))return To(e);if(Zt(e))return Vi(e);if(tr(e))return zi(e)}throw Jt(e)}function Ui(e){return new I(function(t){var r=e[ft]();if(k(r.subscribe))return r.subscribe(t);throw new TypeError("Provided object does not correctly implement Symbol.observable")})}function Ni(e){return new I(function(t){for(var r=0;r=2;return function(o){return o.pipe(e?v(function(n,i){return e(n,i,o)}):pe,ue(1),r?$e(t):Uo(function(){return new or}))}}function Rr(e){return e<=0?function(){return M}:g(function(t,r){var o=[];t.subscribe(E(r,function(n){o.push(n),e=2,!0))}function de(e){e===void 0&&(e={});var t=e.connector,r=t===void 0?function(){return new x}:t,o=e.resetOnError,n=o===void 0?!0:o,i=e.resetOnComplete,s=i===void 0?!0:i,a=e.resetOnRefCountZero,c=a===void 0?!0:a;return function(p){var l,f,u,d=0,y=!1,b=!1,D=function(){f==null||f.unsubscribe(),f=void 0},Q=function(){D(),l=u=void 0,y=b=!1},J=function(){var C=l;Q(),C==null||C.unsubscribe()};return g(function(C,ct){d++,!b&&!y&&D();var Ve=u=u!=null?u:r();ct.add(function(){d--,d===0&&!b&&!y&&(f=jr(J,c))}),Ve.subscribe(ct),!l&&d>0&&(l=new it({next:function(Fe){return Ve.next(Fe)},error:function(Fe){b=!0,D(),f=jr(Q,n,Fe),Ve.error(Fe)},complete:function(){y=!0,D(),f=jr(Q,s),Ve.complete()}}),F(C).subscribe(l))})(p)}}function jr(e,t){for(var r=[],o=2;oe.next(document)),e}function W(e,t=document){return Array.from(t.querySelectorAll(e))}function U(e,t=document){let r=ce(e,t);if(typeof r=="undefined")throw new ReferenceError(`Missing element: expected "${e}" to be present`);return r}function ce(e,t=document){return t.querySelector(e)||void 0}function Ie(){return document.activeElement instanceof HTMLElement&&document.activeElement||void 0}var ca=L(h(document.body,"focusin"),h(document.body,"focusout")).pipe(ye(1),q(void 0),m(()=>Ie()||document.body),Z(1));function vt(e){return ca.pipe(m(t=>e.contains(t)),X())}function qo(e,t){return L(h(e,"mouseenter").pipe(m(()=>!0)),h(e,"mouseleave").pipe(m(()=>!1))).pipe(t?ye(t):pe,q(!1))}function Ue(e){return{x:e.offsetLeft,y:e.offsetTop}}function Ko(e){return L(h(window,"load"),h(window,"resize")).pipe(Le(0,ge),m(()=>Ue(e)),q(Ue(e)))}function ir(e){return{x:e.scrollLeft,y:e.scrollTop}}function et(e){return L(h(e,"scroll"),h(window,"resize")).pipe(Le(0,ge),m(()=>ir(e)),q(ir(e)))}function Qo(e,t){if(typeof t=="string"||typeof t=="number")e.innerHTML+=t.toString();else if(t instanceof Node)e.appendChild(t);else if(Array.isArray(t))for(let r of t)Qo(e,r)}function S(e,t,...r){let o=document.createElement(e);if(t)for(let n of Object.keys(t))typeof t[n]!="undefined"&&(typeof t[n]!="boolean"?o.setAttribute(n,t[n]):o.setAttribute(n,""));for(let n of r)Qo(o,n);return o}function ar(e){if(e>999){let t=+((e-950)%1e3>99);return`${((e+1e-6)/1e3).toFixed(t)}k`}else return e.toString()}function gt(e){let t=S("script",{src:e});return H(()=>(document.head.appendChild(t),L(h(t,"load"),h(t,"error").pipe(w(()=>kr(()=>new ReferenceError(`Invalid script: ${e}`))))).pipe(m(()=>{}),A(()=>document.head.removeChild(t)),ue(1))))}var Yo=new x,pa=H(()=>typeof ResizeObserver=="undefined"?gt("https://unpkg.com/resize-observer-polyfill"):R(void 0)).pipe(m(()=>new ResizeObserver(e=>{for(let t of e)Yo.next(t)})),w(e=>L(Ke,R(e)).pipe(A(()=>e.disconnect()))),Z(1));function le(e){return{width:e.offsetWidth,height:e.offsetHeight}}function Se(e){return pa.pipe(T(t=>t.observe(e)),w(t=>Yo.pipe(v(({target:r})=>r===e),A(()=>t.unobserve(e)),m(()=>le(e)))),q(le(e)))}function xt(e){return{width:e.scrollWidth,height:e.scrollHeight}}function sr(e){let t=e.parentElement;for(;t&&(e.scrollWidth<=t.scrollWidth&&e.scrollHeight<=t.scrollHeight);)t=(e=t).parentElement;return t?e:void 0}var Bo=new x,la=H(()=>R(new IntersectionObserver(e=>{for(let t of e)Bo.next(t)},{threshold:0}))).pipe(w(e=>L(Ke,R(e)).pipe(A(()=>e.disconnect()))),Z(1));function yt(e){return la.pipe(T(t=>t.observe(e)),w(t=>Bo.pipe(v(({target:r})=>r===e),A(()=>t.unobserve(e)),m(({isIntersecting:r})=>r))))}function Go(e,t=16){return et(e).pipe(m(({y:r})=>{let o=le(e),n=xt(e);return r>=n.height-o.height-t}),X())}var cr={drawer:U("[data-md-toggle=drawer]"),search:U("[data-md-toggle=search]")};function Jo(e){return cr[e].checked}function Ye(e,t){cr[e].checked!==t&&cr[e].click()}function Ne(e){let t=cr[e];return h(t,"change").pipe(m(()=>t.checked),q(t.checked))}function ma(e,t){switch(e.constructor){case HTMLInputElement:return e.type==="radio"?/^Arrow/.test(t):!0;case HTMLSelectElement:case HTMLTextAreaElement:return!0;default:return e.isContentEditable}}function fa(){return L(h(window,"compositionstart").pipe(m(()=>!0)),h(window,"compositionend").pipe(m(()=>!1))).pipe(q(!1))}function Xo(){let e=h(window,"keydown").pipe(v(t=>!(t.metaKey||t.ctrlKey)),m(t=>({mode:Jo("search")?"search":"global",type:t.key,claim(){t.preventDefault(),t.stopPropagation()}})),v(({mode:t,type:r})=>{if(t==="global"){let o=Ie();if(typeof o!="undefined")return!ma(o,r)}return!0}),de());return fa().pipe(w(t=>t?M:e))}function me(){return new URL(location.href)}function st(e,t=!1){if(G("navigation.instant")&&!t){let r=S("a",{href:e.href});document.body.appendChild(r),r.click(),r.remove()}else location.href=e.href}function Zo(){return new x}function en(){return location.hash.slice(1)}function pr(e){let t=S("a",{href:e});t.addEventListener("click",r=>r.stopPropagation()),t.click()}function ua(e){return L(h(window,"hashchange"),e).pipe(m(en),q(en()),v(t=>t.length>0),Z(1))}function tn(e){return ua(e).pipe(m(t=>ce(`[id="${t}"]`)),v(t=>typeof t!="undefined"))}function At(e){let t=matchMedia(e);return nr(r=>t.addListener(()=>r(t.matches))).pipe(q(t.matches))}function rn(){let e=matchMedia("print");return L(h(window,"beforeprint").pipe(m(()=>!0)),h(window,"afterprint").pipe(m(()=>!1))).pipe(q(e.matches))}function Dr(e,t){return e.pipe(w(r=>r?t():M))}function lr(e,t){return new I(r=>{let o=new XMLHttpRequest;o.open("GET",`${e}`),o.responseType="blob",o.addEventListener("load",()=>{o.status>=200&&o.status<300?(r.next(o.response),r.complete()):r.error(new Error(o.statusText))}),o.addEventListener("error",()=>{r.error(new Error("Network Error"))}),o.addEventListener("abort",()=>{r.error(new Error("Request aborted"))}),typeof(t==null?void 0:t.progress$)!="undefined"&&(o.addEventListener("progress",n=>{if(n.lengthComputable)t.progress$.next(n.loaded/n.total*100);else{let i=Number(o.getResponseHeader("Content-Length"))||0;t.progress$.next(n.loaded/i*100)}}),t.progress$.next(5)),o.send()})}function De(e,t){return lr(e,t).pipe(w(r=>r.text()),m(r=>JSON.parse(r)),Z(1))}function on(e,t){let r=new DOMParser;return lr(e,t).pipe(w(o=>o.text()),m(o=>r.parseFromString(o,"text/xml")),Z(1))}function nn(){return{x:Math.max(0,scrollX),y:Math.max(0,scrollY)}}function an(){return L(h(window,"scroll",{passive:!0}),h(window,"resize",{passive:!0})).pipe(m(nn),q(nn()))}function sn(){return{width:innerWidth,height:innerHeight}}function cn(){return h(window,"resize",{passive:!0}).pipe(m(sn),q(sn()))}function pn(){return B([an(),cn()]).pipe(m(([e,t])=>({offset:e,size:t})),Z(1))}function mr(e,{viewport$:t,header$:r}){let o=t.pipe(te("size")),n=B([o,r]).pipe(m(()=>Ue(e)));return B([r,t,n]).pipe(m(([{height:i},{offset:s,size:a},{x:c,y:p}])=>({offset:{x:s.x-c,y:s.y-p+i},size:a})))}function da(e){return h(e,"message",t=>t.data)}function ha(e){let t=new x;return t.subscribe(r=>e.postMessage(r)),t}function ln(e,t=new Worker(e)){let r=da(t),o=ha(t),n=new x;n.subscribe(o);let i=o.pipe(ee(),oe(!0));return n.pipe(ee(),Re(r.pipe(j(i))),de())}var ba=U("#__config"),Et=JSON.parse(ba.textContent);Et.base=`${new URL(Et.base,me())}`;function he(){return Et}function G(e){return Et.features.includes(e)}function we(e,t){return typeof t!="undefined"?Et.translations[e].replace("#",t.toString()):Et.translations[e]}function Oe(e,t=document){return U(`[data-md-component=${e}]`,t)}function ne(e,t=document){return W(`[data-md-component=${e}]`,t)}function va(e){let t=U(".md-typeset > :first-child",e);return h(t,"click",{once:!0}).pipe(m(()=>U(".md-typeset",e)),m(r=>({hash:__md_hash(r.innerHTML)})))}function mn(e){if(!G("announce.dismiss")||!e.childElementCount)return M;if(!e.hidden){let t=U(".md-typeset",e);__md_hash(t.innerHTML)===__md_get("__announce")&&(e.hidden=!0)}return H(()=>{let t=new x;return t.subscribe(({hash:r})=>{e.hidden=!0,__md_set("__announce",r)}),va(e).pipe(T(r=>t.next(r)),A(()=>t.complete()),m(r=>P({ref:e},r)))})}function ga(e,{target$:t}){return t.pipe(m(r=>({hidden:r!==e})))}function fn(e,t){let r=new x;return r.subscribe(({hidden:o})=>{e.hidden=o}),ga(e,t).pipe(T(o=>r.next(o)),A(()=>r.complete()),m(o=>P({ref:e},o)))}function Ct(e,t){return t==="inline"?S("div",{class:"md-tooltip md-tooltip--inline",id:e,role:"tooltip"},S("div",{class:"md-tooltip__inner md-typeset"})):S("div",{class:"md-tooltip",id:e,role:"tooltip"},S("div",{class:"md-tooltip__inner md-typeset"}))}function un(e,t){if(t=t?`${t}_annotation_${e}`:void 0,t){let r=t?`#${t}`:void 0;return S("aside",{class:"md-annotation",tabIndex:0},Ct(t),S("a",{href:r,class:"md-annotation__index",tabIndex:-1},S("span",{"data-md-annotation-id":e})))}else return S("aside",{class:"md-annotation",tabIndex:0},Ct(t),S("span",{class:"md-annotation__index",tabIndex:-1},S("span",{"data-md-annotation-id":e})))}function dn(e){return S("button",{class:"md-clipboard md-icon",title:we("clipboard.copy"),"data-clipboard-target":`#${e} > code`})}function Vr(e,t){let r=t&2,o=t&1,n=Object.keys(e.terms).filter(c=>!e.terms[c]).reduce((c,p)=>[...c,S("del",null,p)," "],[]).slice(0,-1),i=he(),s=new URL(e.location,i.base);G("search.highlight")&&s.searchParams.set("h",Object.entries(e.terms).filter(([,c])=>c).reduce((c,[p])=>`${c} ${p}`.trim(),""));let{tags:a}=he();return S("a",{href:`${s}`,class:"md-search-result__link",tabIndex:-1},S("article",{class:"md-search-result__article md-typeset","data-md-score":e.score.toFixed(2)},r>0&&S("div",{class:"md-search-result__icon md-icon"}),r>0&&S("h1",null,e.title),r<=0&&S("h2",null,e.title),o>0&&e.text.length>0&&e.text,e.tags&&e.tags.map(c=>{let p=a?c in a?`md-tag-icon md-tag--${a[c]}`:"md-tag-icon":"";return S("span",{class:`md-tag ${p}`},c)}),o>0&&n.length>0&&S("p",{class:"md-search-result__terms"},we("search.result.term.missing"),": ",...n)))}function hn(e){let t=e[0].score,r=[...e],o=he(),n=r.findIndex(l=>!`${new URL(l.location,o.base)}`.includes("#")),[i]=r.splice(n,1),s=r.findIndex(l=>l.scoreVr(l,1)),...c.length?[S("details",{class:"md-search-result__more"},S("summary",{tabIndex:-1},S("div",null,c.length>0&&c.length===1?we("search.result.more.one"):we("search.result.more.other",c.length))),...c.map(l=>Vr(l,1)))]:[]];return S("li",{class:"md-search-result__item"},p)}function bn(e){return S("ul",{class:"md-source__facts"},Object.entries(e).map(([t,r])=>S("li",{class:`md-source__fact md-source__fact--${t}`},typeof r=="number"?ar(r):r)))}function zr(e){let t=`tabbed-control tabbed-control--${e}`;return S("div",{class:t,hidden:!0},S("button",{class:"tabbed-button",tabIndex:-1,"aria-hidden":"true"}))}function vn(e){return S("div",{class:"md-typeset__scrollwrap"},S("div",{class:"md-typeset__table"},e))}function xa(e){let t=he(),r=new URL(`../${e.version}/`,t.base);return S("li",{class:"md-version__item"},S("a",{href:`${r}`,class:"md-version__link"},e.title))}function gn(e,t){return S("div",{class:"md-version"},S("button",{class:"md-version__current","aria-label":we("select.version")},t.title),S("ul",{class:"md-version__list"},e.map(xa)))}var ya=0;function Ea(e,t){document.body.append(e);let{width:r}=le(e);e.style.setProperty("--md-tooltip-width",`${r}px`),e.remove();let o=sr(t),n=typeof o!="undefined"?et(o):R({x:0,y:0}),i=L(vt(t),qo(t)).pipe(X());return B([i,n]).pipe(m(([s,a])=>{let{x:c,y:p}=Ue(t),l=le(t),f=t.closest("table");return f&&t.parentElement&&(c+=f.offsetLeft+t.parentElement.offsetLeft,p+=f.offsetTop+t.parentElement.offsetTop),{active:s,offset:{x:c-a.x+l.width/2-r/2,y:p-a.y+l.height+8}}}))}function Be(e){let t=e.title;if(!t.length)return M;let r=`__tooltip_${ya++}`,o=Ct(r,"inline"),n=U(".md-typeset",o);return n.innerHTML=t,H(()=>{let i=new x;return i.subscribe({next({offset:s}){o.style.setProperty("--md-tooltip-x",`${s.x}px`),o.style.setProperty("--md-tooltip-y",`${s.y}px`)},complete(){o.style.removeProperty("--md-tooltip-x"),o.style.removeProperty("--md-tooltip-y")}}),L(i.pipe(v(({active:s})=>s)),i.pipe(ye(250),v(({active:s})=>!s))).subscribe({next({active:s}){s?(e.insertAdjacentElement("afterend",o),e.setAttribute("aria-describedby",r),e.removeAttribute("title")):(o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t))},complete(){o.remove(),e.removeAttribute("aria-describedby"),e.setAttribute("title",t)}}),i.pipe(Le(16,ge)).subscribe(({active:s})=>{o.classList.toggle("md-tooltip--active",s)}),i.pipe(_t(125,ge),v(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:s})=>s)).subscribe({next(s){s?o.style.setProperty("--md-tooltip-0",`${-s}px`):o.style.removeProperty("--md-tooltip-0")},complete(){o.style.removeProperty("--md-tooltip-0")}}),Ea(o,e).pipe(T(s=>i.next(s)),A(()=>i.complete()),m(s=>P({ref:e},s)))}).pipe(qe(ie))}function wa(e,t){let r=H(()=>B([Ko(e),et(t)])).pipe(m(([{x:o,y:n},i])=>{let{width:s,height:a}=le(e);return{x:o-i.x+s/2,y:n-i.y+a/2}}));return vt(e).pipe(w(o=>r.pipe(m(n=>({active:o,offset:n})),ue(+!o||1/0))))}function xn(e,t,{target$:r}){let[o,n]=Array.from(e.children);return H(()=>{let i=new x,s=i.pipe(ee(),oe(!0));return i.subscribe({next({offset:a}){e.style.setProperty("--md-tooltip-x",`${a.x}px`),e.style.setProperty("--md-tooltip-y",`${a.y}px`)},complete(){e.style.removeProperty("--md-tooltip-x"),e.style.removeProperty("--md-tooltip-y")}}),yt(e).pipe(j(s)).subscribe(a=>{e.toggleAttribute("data-md-visible",a)}),L(i.pipe(v(({active:a})=>a)),i.pipe(ye(250),v(({active:a})=>!a))).subscribe({next({active:a}){a?e.prepend(o):o.remove()},complete(){e.prepend(o)}}),i.pipe(Le(16,ge)).subscribe(({active:a})=>{o.classList.toggle("md-tooltip--active",a)}),i.pipe(_t(125,ge),v(()=>!!e.offsetParent),m(()=>e.offsetParent.getBoundingClientRect()),m(({x:a})=>a)).subscribe({next(a){a?e.style.setProperty("--md-tooltip-0",`${-a}px`):e.style.removeProperty("--md-tooltip-0")},complete(){e.style.removeProperty("--md-tooltip-0")}}),h(n,"click").pipe(j(s),v(a=>!(a.metaKey||a.ctrlKey))).subscribe(a=>{a.stopPropagation(),a.preventDefault()}),h(n,"mousedown").pipe(j(s),ae(i)).subscribe(([a,{active:c}])=>{var p;if(a.button!==0||a.metaKey||a.ctrlKey)a.preventDefault();else if(c){a.preventDefault();let l=e.parentElement.closest(".md-annotation");l instanceof HTMLElement?l.focus():(p=Ie())==null||p.blur()}}),r.pipe(j(s),v(a=>a===o),Qe(125)).subscribe(()=>e.focus()),wa(e,t).pipe(T(a=>i.next(a)),A(()=>i.complete()),m(a=>P({ref:e},a)))})}function Ta(e){return e.tagName==="CODE"?W(".c, .c1, .cm",e):[e]}function Sa(e){let t=[];for(let r of Ta(e)){let o=[],n=document.createNodeIterator(r,NodeFilter.SHOW_TEXT);for(let i=n.nextNode();i;i=n.nextNode())o.push(i);for(let i of o){let s;for(;s=/(\(\d+\))(!)?/.exec(i.textContent);){let[,a,c]=s;if(typeof c=="undefined"){let p=i.splitText(s.index);i=p.splitText(a.length),t.push(p)}else{i.textContent=a,t.push(i);break}}}}return t}function yn(e,t){t.append(...Array.from(e.childNodes))}function fr(e,t,{target$:r,print$:o}){let n=t.closest("[id]"),i=n==null?void 0:n.id,s=new Map;for(let a of Sa(t)){let[,c]=a.textContent.match(/\((\d+)\)/);ce(`:scope > li:nth-child(${c})`,e)&&(s.set(c,un(c,i)),a.replaceWith(s.get(c)))}return s.size===0?M:H(()=>{let a=new x,c=a.pipe(ee(),oe(!0)),p=[];for(let[l,f]of s)p.push([U(".md-typeset",f),U(`:scope > li:nth-child(${l})`,e)]);return o.pipe(j(c)).subscribe(l=>{e.hidden=!l,e.classList.toggle("md-annotation-list",l);for(let[f,u]of p)l?yn(f,u):yn(u,f)}),L(...[...s].map(([,l])=>xn(l,t,{target$:r}))).pipe(A(()=>a.complete()),de())})}function En(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return En(t)}}function wn(e,t){return H(()=>{let r=En(e);return typeof r!="undefined"?fr(r,e,t):M})}var Tn=jt(Kr());var Oa=0;function Sn(e){if(e.nextElementSibling){let t=e.nextElementSibling;if(t.tagName==="OL")return t;if(t.tagName==="P"&&!t.children.length)return Sn(t)}}function Ma(e){return Se(e).pipe(m(({width:t})=>({scrollable:xt(e).width>t})),te("scrollable"))}function On(e,t){let{matches:r}=matchMedia("(hover)"),o=H(()=>{let n=new x,i=n.pipe(Rr(1));n.subscribe(({scrollable:c})=>{c&&r?e.setAttribute("tabindex","0"):e.removeAttribute("tabindex")});let s=[];if(Tn.default.isSupported()&&(e.closest(".copy")||G("content.code.copy")&&!e.closest(".no-copy"))){let c=e.closest("pre");c.id=`__code_${Oa++}`;let p=dn(c.id);c.insertBefore(p,e),G("content.tooltips")&&s.push(Be(p))}let a=e.closest(".highlight");if(a instanceof HTMLElement){let c=Sn(a);if(typeof c!="undefined"&&(a.classList.contains("annotate")||G("content.code.annotate"))){let p=fr(c,e,t);s.push(Se(a).pipe(j(i),m(({width:l,height:f})=>l&&f),X(),w(l=>l?p:M)))}}return Ma(e).pipe(T(c=>n.next(c)),A(()=>n.complete()),m(c=>P({ref:e},c)),Re(...s))});return G("content.lazy")?yt(e).pipe(v(n=>n),ue(1),w(()=>o)):o}function La(e,{target$:t,print$:r}){let o=!0;return L(t.pipe(m(n=>n.closest("details:not([open])")),v(n=>e===n),m(()=>({action:"open",reveal:!0}))),r.pipe(v(n=>n||!o),T(()=>o=e.open),m(n=>({action:n?"open":"close"}))))}function Mn(e,t){return H(()=>{let r=new x;return r.subscribe(({action:o,reveal:n})=>{e.toggleAttribute("open",o==="open"),n&&e.scrollIntoView()}),La(e,t).pipe(T(o=>r.next(o)),A(()=>r.complete()),m(o=>P({ref:e},o)))})}var Ln=".node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}marker{fill:var(--md-mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--md-mermaid-label-fg-color)}.edgeLabel,.edgeLabel rect,.label div .edgeLabel{background-color:var(--md-mermaid-label-bg-color)}.edgeLabel,.edgeLabel rect{fill:var(--md-mermaid-label-bg-color);color:var(--md-mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--md-mermaid-edge-color);stroke-width:.05rem}.edgePath .arrowheadPath{fill:var(--md-mermaid-edge-color);stroke:none}.cluster rect{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}.cluster span{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}g #flowchart-circleEnd,g #flowchart-circleStart,g #flowchart-crossEnd,g #flowchart-crossStart,g #flowchart-pointEnd,g #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.classGroup text{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.classLabel .box{fill:var(--md-mermaid-label-bg-color);background-color:var(--md-mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node .divider{stroke:var(--md-mermaid-node-fg-color)}.relation{stroke:var(--md-mermaid-edge-color)}.cardinality{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--md-mermaid-edge-color)!important;stroke:var(--md-mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--md-mermaid-label-bg-color)!important;stroke:var(--md-mermaid-edge-color)!important}g.stateGroup rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--md-mermaid-label-fg-color)!important;font-family:var(--md-mermaid-font-family)}g.stateGroup .composit{fill:var(--md-mermaid-label-bg-color)}.nodeLabel{color:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--md-mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--md-mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--md-mermaid-label-bg-color)}.transition{stroke:var(--md-mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--md-mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--md-default-bg-color)}.statediagram-cluster rect{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--md-default-fg-color--lightest);stroke:var(--md-default-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--md-mermaid-edge-color)}.attributeBoxEven,.attributeBoxOdd{fill:var(--md-mermaid-node-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityBox{fill:var(--md-mermaid-label-bg-color);stroke:var(--md-mermaid-node-fg-color)}.entityLabel{fill:var(--md-mermaid-label-fg-color);font-family:var(--md-mermaid-font-family)}.relationshipLabelBox{fill:var(--md-mermaid-label-bg-color);fill-opacity:1;background-color:var(--md-mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--md-mermaid-label-fg-color)}.relationshipLine{stroke:var(--md-mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--md-mermaid-edge-color)!important}defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--md-mermaid-label-bg-color)}.actor{fill:var(--md-mermaid-sequence-actor-bg-color);stroke:var(--md-mermaid-sequence-actor-border-color)}text.actor>tspan{fill:var(--md-mermaid-sequence-actor-fg-color);font-family:var(--md-mermaid-font-family)}line{stroke:var(--md-mermaid-sequence-actor-line-color)}.actor-man circle,.actor-man line{fill:var(--md-mermaid-sequence-actorman-bg-color);stroke:var(--md-mermaid-sequence-actorman-line-color)}.messageLine0,.messageLine1{stroke:var(--md-mermaid-sequence-message-line-color)}.note{fill:var(--md-mermaid-sequence-note-bg-color);stroke:var(--md-mermaid-sequence-note-border-color)}.loopText,.loopText>tspan,.messageText,.noteText>tspan{stroke:none;font-family:var(--md-mermaid-font-family)!important}.messageText{fill:var(--md-mermaid-sequence-message-fg-color)}.loopText,.loopText>tspan{fill:var(--md-mermaid-sequence-loop-fg-color)}.noteText>tspan{fill:var(--md-mermaid-sequence-note-fg-color)}#arrowhead path{fill:var(--md-mermaid-sequence-message-line-color);stroke:none}.loopLine{fill:var(--md-mermaid-sequence-loop-bg-color);stroke:var(--md-mermaid-sequence-loop-border-color)}.labelBox{fill:var(--md-mermaid-sequence-label-bg-color);stroke:none}.labelText,.labelText>span{fill:var(--md-mermaid-sequence-label-fg-color);font-family:var(--md-mermaid-font-family)}.sequenceNumber{fill:var(--md-mermaid-sequence-number-fg-color)}rect.rect{fill:var(--md-mermaid-sequence-box-bg-color);stroke:none}rect.rect+text.text{fill:var(--md-mermaid-sequence-box-fg-color)}defs #sequencenumber{fill:var(--md-mermaid-sequence-number-bg-color)!important}";var Qr,Aa=0;function Ca(){return typeof mermaid=="undefined"||mermaid instanceof Element?gt("https://unpkg.com/mermaid@10.6.1/dist/mermaid.min.js"):R(void 0)}function _n(e){return e.classList.remove("mermaid"),Qr||(Qr=Ca().pipe(T(()=>mermaid.initialize({startOnLoad:!1,themeCSS:Ln,sequence:{actorFontSize:"16px",messageFontSize:"16px",noteFontSize:"16px"}})),m(()=>{}),Z(1))),Qr.subscribe(()=>no(this,null,function*(){e.classList.add("mermaid");let t=`__mermaid_${Aa++}`,r=S("div",{class:"mermaid"}),o=e.textContent,{svg:n,fn:i}=yield mermaid.render(t,o),s=r.attachShadow({mode:"closed"});s.innerHTML=n,e.replaceWith(r),i==null||i(s)})),Qr.pipe(m(()=>({ref:e})))}var An=S("table");function Cn(e){return e.replaceWith(An),An.replaceWith(vn(e)),R({ref:e})}function ka(e){let t=e.find(r=>r.checked)||e[0];return L(...e.map(r=>h(r,"change").pipe(m(()=>U(`label[for="${r.id}"]`))))).pipe(q(U(`label[for="${t.id}"]`)),m(r=>({active:r})))}function kn(e,{viewport$:t,target$:r}){let o=U(".tabbed-labels",e),n=W(":scope > input",e),i=zr("prev");e.append(i);let s=zr("next");return e.append(s),H(()=>{let a=new x,c=a.pipe(ee(),oe(!0));B([a,Se(e)]).pipe(j(c),Le(1,ge)).subscribe({next([{active:p},l]){let f=Ue(p),{width:u}=le(p);e.style.setProperty("--md-indicator-x",`${f.x}px`),e.style.setProperty("--md-indicator-width",`${u}px`);let d=ir(o);(f.xd.x+l.width)&&o.scrollTo({left:Math.max(0,f.x-16),behavior:"smooth"})},complete(){e.style.removeProperty("--md-indicator-x"),e.style.removeProperty("--md-indicator-width")}}),B([et(o),Se(o)]).pipe(j(c)).subscribe(([p,l])=>{let f=xt(o);i.hidden=p.x<16,s.hidden=p.x>f.width-l.width-16}),L(h(i,"click").pipe(m(()=>-1)),h(s,"click").pipe(m(()=>1))).pipe(j(c)).subscribe(p=>{let{width:l}=le(o);o.scrollBy({left:l*p,behavior:"smooth"})}),r.pipe(j(c),v(p=>n.includes(p))).subscribe(p=>p.click()),o.classList.add("tabbed-labels--linked");for(let p of n){let l=U(`label[for="${p.id}"]`);l.replaceChildren(S("a",{href:`#${l.htmlFor}`,tabIndex:-1},...Array.from(l.childNodes))),h(l.firstElementChild,"click").pipe(j(c),v(f=>!(f.metaKey||f.ctrlKey)),T(f=>{f.preventDefault(),f.stopPropagation()})).subscribe(()=>{history.replaceState({},"",`#${l.htmlFor}`),l.click()})}return G("content.tabs.link")&&a.pipe(Ee(1),ae(t)).subscribe(([{active:p},{offset:l}])=>{let f=p.innerText.trim();if(p.hasAttribute("data-md-switching"))p.removeAttribute("data-md-switching");else{let u=e.offsetTop-l.y;for(let y of W("[data-tabs]"))for(let b of W(":scope > input",y)){let D=U(`label[for="${b.id}"]`);if(D!==p&&D.innerText.trim()===f){D.setAttribute("data-md-switching",""),b.click();break}}window.scrollTo({top:e.offsetTop-u});let d=__md_get("__tabs")||[];__md_set("__tabs",[...new Set([f,...d])])}}),a.pipe(j(c)).subscribe(()=>{for(let p of W("audio, video",e))p.pause()}),ka(n).pipe(T(p=>a.next(p)),A(()=>a.complete()),m(p=>P({ref:e},p)))}).pipe(qe(ie))}function Hn(e,{viewport$:t,target$:r,print$:o}){return L(...W(".annotate:not(.highlight)",e).map(n=>wn(n,{target$:r,print$:o})),...W("pre:not(.mermaid) > code",e).map(n=>On(n,{target$:r,print$:o})),...W("pre.mermaid",e).map(n=>_n(n)),...W("table:not([class])",e).map(n=>Cn(n)),...W("details",e).map(n=>Mn(n,{target$:r,print$:o})),...W("[data-tabs]",e).map(n=>kn(n,{viewport$:t,target$:r})),...W("[title]",e).filter(()=>G("content.tooltips")).map(n=>Be(n)))}function Ha(e,{alert$:t}){return t.pipe(w(r=>L(R(!0),R(!1).pipe(Qe(2e3))).pipe(m(o=>({message:r,active:o})))))}function $n(e,t){let r=U(".md-typeset",e);return H(()=>{let o=new x;return o.subscribe(({message:n,active:i})=>{e.classList.toggle("md-dialog--active",i),r.textContent=n}),Ha(e,t).pipe(T(n=>o.next(n)),A(()=>o.complete()),m(n=>P({ref:e},n)))})}function $a({viewport$:e}){if(!G("header.autohide"))return R(!1);let t=e.pipe(m(({offset:{y:n}})=>n),Ce(2,1),m(([n,i])=>[nMath.abs(i-n.y)>100),m(([,[n]])=>n),X()),o=Ne("search");return B([e,o]).pipe(m(([{offset:n},i])=>n.y>400&&!i),X(),w(n=>n?r:R(!1)),q(!1))}function Pn(e,t){return H(()=>B([Se(e),$a(t)])).pipe(m(([{height:r},o])=>({height:r,hidden:o})),X((r,o)=>r.height===o.height&&r.hidden===o.hidden),Z(1))}function Rn(e,{header$:t,main$:r}){return H(()=>{let o=new x,n=o.pipe(ee(),oe(!0));o.pipe(te("active"),Ze(t)).subscribe(([{active:s},{hidden:a}])=>{e.classList.toggle("md-header--shadow",s&&!a),e.hidden=a});let i=fe(W("[title]",e)).pipe(v(()=>G("content.tooltips")),re(s=>Be(s)));return r.subscribe(o),t.pipe(j(n),m(s=>P({ref:e},s)),Re(i.pipe(j(n))))})}function Pa(e,{viewport$:t,header$:r}){return mr(e,{viewport$:t,header$:r}).pipe(m(({offset:{y:o}})=>{let{height:n}=le(e);return{active:o>=n}}),te("active"))}function In(e,t){return H(()=>{let r=new x;r.subscribe({next({active:n}){e.classList.toggle("md-header__title--active",n)},complete(){e.classList.remove("md-header__title--active")}});let o=ce(".md-content h1");return typeof o=="undefined"?M:Pa(o,t).pipe(T(n=>r.next(n)),A(()=>r.complete()),m(n=>P({ref:e},n)))})}function Fn(e,{viewport$:t,header$:r}){let o=r.pipe(m(({height:i})=>i),X()),n=o.pipe(w(()=>Se(e).pipe(m(({height:i})=>({top:e.offsetTop,bottom:e.offsetTop+i})),te("bottom"))));return B([o,n,t]).pipe(m(([i,{top:s,bottom:a},{offset:{y:c},size:{height:p}}])=>(p=Math.max(0,p-Math.max(0,s-c,i)-Math.max(0,p+c-a)),{offset:s-i,height:p,active:s-i<=c})),X((i,s)=>i.offset===s.offset&&i.height===s.height&&i.active===s.active))}function Ra(e){let t=__md_get("__palette")||{index:e.findIndex(r=>matchMedia(r.getAttribute("data-md-color-media")).matches)};return R(...e).pipe(re(r=>h(r,"change").pipe(m(()=>r))),q(e[Math.max(0,t.index)]),m(r=>({index:e.indexOf(r),color:{media:r.getAttribute("data-md-color-media"),scheme:r.getAttribute("data-md-color-scheme"),primary:r.getAttribute("data-md-color-primary"),accent:r.getAttribute("data-md-color-accent")}})),Z(1))}function jn(e){let t=W("input",e),r=S("meta",{name:"theme-color"});document.head.appendChild(r);let o=S("meta",{name:"color-scheme"});document.head.appendChild(o);let n=At("(prefers-color-scheme: light)");return H(()=>{let i=new x;return i.subscribe(s=>{if(document.body.setAttribute("data-md-color-switching",""),s.color.media==="(prefers-color-scheme)"){let a=matchMedia("(prefers-color-scheme: light)"),c=document.querySelector(a.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");s.color.scheme=c.getAttribute("data-md-color-scheme"),s.color.primary=c.getAttribute("data-md-color-primary"),s.color.accent=c.getAttribute("data-md-color-accent")}for(let[a,c]of Object.entries(s.color))document.body.setAttribute(`data-md-color-${a}`,c);for(let a=0;a{let s=Oe("header"),a=window.getComputedStyle(s);return o.content=a.colorScheme,a.backgroundColor.match(/\d+/g).map(c=>(+c).toString(16).padStart(2,"0")).join("")})).subscribe(s=>r.content=`#${s}`),i.pipe(Me(ie)).subscribe(()=>{document.body.removeAttribute("data-md-color-switching")}),Ra(t).pipe(j(n.pipe(Ee(1))),at(),T(s=>i.next(s)),A(()=>i.complete()),m(s=>P({ref:e},s)))})}function Wn(e,{progress$:t}){return H(()=>{let r=new x;return r.subscribe(({value:o})=>{e.style.setProperty("--md-progress-value",`${o}`)}),t.pipe(T(o=>r.next({value:o})),A(()=>r.complete()),m(o=>({ref:e,value:o})))})}var Yr=jt(Kr());function Ia(e){e.setAttribute("data-md-copying","");let t=e.closest("[data-copy]"),r=t?t.getAttribute("data-copy"):e.innerText;return e.removeAttribute("data-md-copying"),r.trimEnd()}function Un({alert$:e}){Yr.default.isSupported()&&new I(t=>{new Yr.default("[data-clipboard-target], [data-clipboard-text]",{text:r=>r.getAttribute("data-clipboard-text")||Ia(U(r.getAttribute("data-clipboard-target")))}).on("success",r=>t.next(r))}).pipe(T(t=>{t.trigger.focus()}),m(()=>we("clipboard.copied"))).subscribe(e)}function Fa(e){if(e.length<2)return[""];let[t,r]=[...e].sort((n,i)=>n.length-i.length).map(n=>n.replace(/[^/]+$/,"")),o=0;if(t===r)o=t.length;else for(;t.charCodeAt(o)===r.charCodeAt(o);)o++;return e.map(n=>n.replace(t.slice(0,o),""))}function ur(e){let t=__md_get("__sitemap",sessionStorage,e);if(t)return R(t);{let r=he();return on(new URL("sitemap.xml",e||r.base)).pipe(m(o=>Fa(W("loc",o).map(n=>n.textContent))),xe(()=>M),$e([]),T(o=>__md_set("__sitemap",o,sessionStorage,e)))}}function Nn(e){let t=ce("[rel=canonical]",e);typeof t!="undefined"&&(t.href=t.href.replace("//localhost:","//127.0.0.1:"));let r=new Map;for(let o of W(":scope > *",e)){let n=o.outerHTML;for(let i of["href","src"]){let s=o.getAttribute(i);if(s===null)continue;let a=new URL(s,t==null?void 0:t.href),c=o.cloneNode();c.setAttribute(i,`${a}`),n=c.outerHTML;break}r.set(n,o)}return r}function Dn({location$:e,viewport$:t,progress$:r}){let o=he();if(location.protocol==="file:")return M;let n=ur().pipe(m(l=>l.map(f=>`${new URL(f,o.base)}`))),i=h(document.body,"click").pipe(ae(n),w(([l,f])=>{if(!(l.target instanceof Element))return M;let u=l.target.closest("a");if(u===null)return M;if(u.target||l.metaKey||l.ctrlKey)return M;let d=new URL(u.href);return d.search=d.hash="",f.includes(`${d}`)?(l.preventDefault(),R(new URL(u.href))):M}),de());i.pipe(ue(1)).subscribe(()=>{let l=ce("link[rel=icon]");typeof l!="undefined"&&(l.href=l.href)}),h(window,"beforeunload").subscribe(()=>{history.scrollRestoration="auto"}),i.pipe(ae(t)).subscribe(([l,{offset:f}])=>{history.scrollRestoration="manual",history.replaceState(f,""),history.pushState(null,"",l)}),i.subscribe(e);let s=e.pipe(q(me()),te("pathname"),Ee(1),w(l=>lr(l,{progress$:r}).pipe(xe(()=>(st(l,!0),M))))),a=new DOMParser,c=s.pipe(w(l=>l.text()),w(l=>{let f=a.parseFromString(l,"text/html");for(let b of["[data-md-component=announce]","[data-md-component=container]","[data-md-component=header-topic]","[data-md-component=outdated]","[data-md-component=logo]","[data-md-component=skip]",...G("navigation.tabs.sticky")?["[data-md-component=tabs]"]:[]]){let D=ce(b),Q=ce(b,f);typeof D!="undefined"&&typeof Q!="undefined"&&D.replaceWith(Q)}let u=Nn(document.head),d=Nn(f.head);for(let[b,D]of d)D.getAttribute("rel")==="stylesheet"||D.hasAttribute("src")||(u.has(b)?u.delete(b):document.head.appendChild(D));for(let b of u.values())b.getAttribute("rel")==="stylesheet"||b.hasAttribute("src")||b.remove();let y=Oe("container");return We(W("script",y)).pipe(w(b=>{let D=f.createElement("script");if(b.src){for(let Q of b.getAttributeNames())D.setAttribute(Q,b.getAttribute(Q));return b.replaceWith(D),new I(Q=>{D.onload=()=>Q.complete()})}else return D.textContent=b.textContent,b.replaceWith(D),M}),ee(),oe(f))}),de());return h(window,"popstate").pipe(m(me)).subscribe(e),e.pipe(q(me()),Ce(2,1),v(([l,f])=>l.pathname===f.pathname&&l.hash!==f.hash),m(([,l])=>l)).subscribe(l=>{var f,u;history.state!==null||!l.hash?window.scrollTo(0,(u=(f=history.state)==null?void 0:f.y)!=null?u:0):(history.scrollRestoration="auto",pr(l.hash),history.scrollRestoration="manual")}),e.pipe(Ir(i),q(me()),Ce(2,1),v(([l,f])=>l.pathname===f.pathname&&l.hash===f.hash),m(([,l])=>l)).subscribe(l=>{history.scrollRestoration="auto",pr(l.hash),history.scrollRestoration="manual",history.back()}),c.pipe(ae(e)).subscribe(([,l])=>{var f,u;history.state!==null||!l.hash?window.scrollTo(0,(u=(f=history.state)==null?void 0:f.y)!=null?u:0):pr(l.hash)}),t.pipe(te("offset"),ye(100)).subscribe(({offset:l})=>{history.replaceState(l,"")}),c}var qn=jt(zn());function Kn(e){let t=e.separator.split("|").map(n=>n.replace(/(\(\?[!=<][^)]+\))/g,"").length===0?"\uFFFD":n).join("|"),r=new RegExp(t,"img"),o=(n,i,s)=>`${i}${s}`;return n=>{n=n.replace(/[\s*+\-:~^]+/g," ").trim();let i=new RegExp(`(^|${e.separator}|)(${n.replace(/[|\\{}()[\]^$+*?.-]/g,"\\$&").replace(r,"|")})`,"img");return s=>(0,qn.default)(s).replace(i,o).replace(/<\/mark>(\s+)]*>/img,"$1")}}function Ht(e){return e.type===1}function dr(e){return e.type===3}function Qn(e,t){let r=ln(e);return L(R(location.protocol!=="file:"),Ne("search")).pipe(Pe(o=>o),w(()=>t)).subscribe(({config:o,docs:n})=>r.next({type:0,data:{config:o,docs:n,options:{suggest:G("search.suggest")}}})),r}function Yn({document$:e}){let t=he(),r=De(new URL("../versions.json",t.base)).pipe(xe(()=>M)),o=r.pipe(m(n=>{let[,i]=t.base.match(/([^/]+)\/?$/);return n.find(({version:s,aliases:a})=>s===i||a.includes(i))||n[0]}));r.pipe(m(n=>new Map(n.map(i=>[`${new URL(`../${i.version}/`,t.base)}`,i]))),w(n=>h(document.body,"click").pipe(v(i=>!i.metaKey&&!i.ctrlKey),ae(o),w(([i,s])=>{if(i.target instanceof Element){let a=i.target.closest("a");if(a&&!a.target&&n.has(a.href)){let c=a.href;return!i.target.closest(".md-version")&&n.get(c)===s?M:(i.preventDefault(),R(c))}}return M}),w(i=>{let{version:s}=n.get(i);return ur(new URL(i)).pipe(m(a=>{let p=me().href.replace(t.base,"");return a.includes(p.split("#")[0])?new URL(`../${s}/${p}`,t.base):new URL(i)}))})))).subscribe(n=>st(n,!0)),B([r,o]).subscribe(([n,i])=>{U(".md-header__topic").appendChild(gn(n,i))}),e.pipe(w(()=>o)).subscribe(n=>{var s;let i=__md_get("__outdated",sessionStorage);if(i===null){i=!0;let a=((s=t.version)==null?void 0:s.default)||"latest";Array.isArray(a)||(a=[a]);e:for(let c of a)for(let p of n.aliases.concat(n.version))if(new RegExp(c,"i").test(p)){i=!1;break e}__md_set("__outdated",i,sessionStorage)}if(i)for(let a of ne("outdated"))a.hidden=!1})}function Da(e,{worker$:t}){let{searchParams:r}=me();r.has("q")&&(Ye("search",!0),e.value=r.get("q"),e.focus(),Ne("search").pipe(Pe(i=>!i)).subscribe(()=>{let i=me();i.searchParams.delete("q"),history.replaceState({},"",`${i}`)}));let o=vt(e),n=L(t.pipe(Pe(Ht)),h(e,"keyup"),o).pipe(m(()=>e.value),X());return B([n,o]).pipe(m(([i,s])=>({value:i,focus:s})),Z(1))}function Bn(e,{worker$:t}){let r=new x,o=r.pipe(ee(),oe(!0));B([t.pipe(Pe(Ht)),r],(i,s)=>s).pipe(te("value")).subscribe(({value:i})=>t.next({type:2,data:i})),r.pipe(te("focus")).subscribe(({focus:i})=>{i&&Ye("search",i)}),h(e.form,"reset").pipe(j(o)).subscribe(()=>e.focus());let n=U("header [for=__search]");return h(n,"click").subscribe(()=>e.focus()),Da(e,{worker$:t}).pipe(T(i=>r.next(i)),A(()=>r.complete()),m(i=>P({ref:e},i)),Z(1))}function Gn(e,{worker$:t,query$:r}){let o=new x,n=Go(e.parentElement).pipe(v(Boolean)),i=e.parentElement,s=U(":scope > :first-child",e),a=U(":scope > :last-child",e);Ne("search").subscribe(l=>a.setAttribute("role",l?"list":"presentation")),o.pipe(ae(r),Wr(t.pipe(Pe(Ht)))).subscribe(([{items:l},{value:f}])=>{switch(l.length){case 0:s.textContent=f.length?we("search.result.none"):we("search.result.placeholder");break;case 1:s.textContent=we("search.result.one");break;default:let u=ar(l.length);s.textContent=we("search.result.other",u)}});let c=o.pipe(T(()=>a.innerHTML=""),w(({items:l})=>L(R(...l.slice(0,10)),R(...l.slice(10)).pipe(Ce(4),Nr(n),w(([f])=>f)))),m(hn),de());return c.subscribe(l=>a.appendChild(l)),c.pipe(re(l=>{let f=ce("details",l);return typeof f=="undefined"?M:h(f,"toggle").pipe(j(o),m(()=>f))})).subscribe(l=>{l.open===!1&&l.offsetTop<=i.scrollTop&&i.scrollTo({top:l.offsetTop})}),t.pipe(v(dr),m(({data:l})=>l)).pipe(T(l=>o.next(l)),A(()=>o.complete()),m(l=>P({ref:e},l)))}function Va(e,{query$:t}){return t.pipe(m(({value:r})=>{let o=me();return o.hash="",r=r.replace(/\s+/g,"+").replace(/&/g,"%26").replace(/=/g,"%3D"),o.search=`q=${r}`,{url:o}}))}function Jn(e,t){let r=new x,o=r.pipe(ee(),oe(!0));return r.subscribe(({url:n})=>{e.setAttribute("data-clipboard-text",e.href),e.href=`${n}`}),h(e,"click").pipe(j(o)).subscribe(n=>n.preventDefault()),Va(e,t).pipe(T(n=>r.next(n)),A(()=>r.complete()),m(n=>P({ref:e},n)))}function Xn(e,{worker$:t,keyboard$:r}){let o=new x,n=Oe("search-query"),i=L(h(n,"keydown"),h(n,"focus")).pipe(Me(ie),m(()=>n.value),X());return o.pipe(Ze(i),m(([{suggest:a},c])=>{let p=c.split(/([\s-]+)/);if(a!=null&&a.length&&p[p.length-1]){let l=a[a.length-1];l.startsWith(p[p.length-1])&&(p[p.length-1]=l)}else p.length=0;return p})).subscribe(a=>e.innerHTML=a.join("").replace(/\s/g," ")),r.pipe(v(({mode:a})=>a==="search")).subscribe(a=>{switch(a.type){case"ArrowRight":e.innerText.length&&n.selectionStart===n.value.length&&(n.value=e.innerText);break}}),t.pipe(v(dr),m(({data:a})=>a)).pipe(T(a=>o.next(a)),A(()=>o.complete()),m(()=>({ref:e})))}function Zn(e,{index$:t,keyboard$:r}){let o=he();try{let n=Qn(o.search,t),i=Oe("search-query",e),s=Oe("search-result",e);h(e,"click").pipe(v(({target:c})=>c instanceof Element&&!!c.closest("a"))).subscribe(()=>Ye("search",!1)),r.pipe(v(({mode:c})=>c==="search")).subscribe(c=>{let p=Ie();switch(c.type){case"Enter":if(p===i){let l=new Map;for(let f of W(":first-child [href]",s)){let u=f.firstElementChild;l.set(f,parseFloat(u.getAttribute("data-md-score")))}if(l.size){let[[f]]=[...l].sort(([,u],[,d])=>d-u);f.click()}c.claim()}break;case"Escape":case"Tab":Ye("search",!1),i.blur();break;case"ArrowUp":case"ArrowDown":if(typeof p=="undefined")i.focus();else{let l=[i,...W(":not(details) > [href], summary, details[open] [href]",s)],f=Math.max(0,(Math.max(0,l.indexOf(p))+l.length+(c.type==="ArrowUp"?-1:1))%l.length);l[f].focus()}c.claim();break;default:i!==Ie()&&i.focus()}}),r.pipe(v(({mode:c})=>c==="global")).subscribe(c=>{switch(c.type){case"f":case"s":case"/":i.focus(),i.select(),c.claim();break}});let a=Bn(i,{worker$:n});return L(a,Gn(s,{worker$:n,query$:a})).pipe(Re(...ne("search-share",e).map(c=>Jn(c,{query$:a})),...ne("search-suggest",e).map(c=>Xn(c,{worker$:n,keyboard$:r}))))}catch(n){return e.hidden=!0,Ke}}function ei(e,{index$:t,location$:r}){return B([t,r.pipe(q(me()),v(o=>!!o.searchParams.get("h")))]).pipe(m(([o,n])=>Kn(o.config)(n.searchParams.get("h"))),m(o=>{var s;let n=new Map,i=document.createNodeIterator(e,NodeFilter.SHOW_TEXT);for(let a=i.nextNode();a;a=i.nextNode())if((s=a.parentElement)!=null&&s.offsetHeight){let c=a.textContent,p=o(c);p.length>c.length&&n.set(a,p)}for(let[a,c]of n){let{childNodes:p}=S("span",null,c);a.replaceWith(...Array.from(p))}return{ref:e,nodes:n}}))}function za(e,{viewport$:t,main$:r}){let o=e.closest(".md-grid"),n=o.offsetTop-o.parentElement.offsetTop;return B([r,t]).pipe(m(([{offset:i,height:s},{offset:{y:a}}])=>(s=s+Math.min(n,Math.max(0,a-i))-n,{height:s,locked:a>=i+n})),X((i,s)=>i.height===s.height&&i.locked===s.locked))}function Br(e,o){var n=o,{header$:t}=n,r=oo(n,["header$"]);let i=U(".md-sidebar__scrollwrap",e),{y:s}=Ue(i);return H(()=>{let a=new x,c=a.pipe(ee(),oe(!0)),p=a.pipe(Le(0,ge));return p.pipe(ae(t)).subscribe({next([{height:l},{height:f}]){i.style.height=`${l-2*s}px`,e.style.top=`${f}px`},complete(){i.style.height="",e.style.top=""}}),p.pipe(Pe()).subscribe(()=>{for(let l of W(".md-nav__link--active[href]",e)){if(!l.clientHeight)continue;let f=l.closest(".md-sidebar__scrollwrap");if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=le(f);f.scrollTo({top:u-d/2})}}}),fe(W("label[tabindex]",e)).pipe(re(l=>h(l,"click").pipe(Me(ie),m(()=>l),j(c)))).subscribe(l=>{let f=U(`[id="${l.htmlFor}"]`);U(`[aria-labelledby="${l.id}"]`).setAttribute("aria-expanded",`${f.checked}`)}),za(e,r).pipe(T(l=>a.next(l)),A(()=>a.complete()),m(l=>P({ref:e},l)))})}function ti(e,t){if(typeof t!="undefined"){let r=`https://api.github.com/repos/${e}/${t}`;return Lt(De(`${r}/releases/latest`).pipe(xe(()=>M),m(o=>({version:o.tag_name})),$e({})),De(r).pipe(xe(()=>M),m(o=>({stars:o.stargazers_count,forks:o.forks_count})),$e({}))).pipe(m(([o,n])=>P(P({},o),n)))}else{let r=`https://api.github.com/users/${e}`;return De(r).pipe(m(o=>({repositories:o.public_repos})),$e({}))}}function ri(e,t){let r=`https://${e}/api/v4/projects/${encodeURIComponent(t)}`;return De(r).pipe(xe(()=>M),m(({star_count:o,forks_count:n})=>({stars:o,forks:n})),$e({}))}function oi(e){let t=e.match(/^.+github\.com\/([^/]+)\/?([^/]+)?/i);if(t){let[,r,o]=t;return ti(r,o)}if(t=e.match(/^.+?([^/]*gitlab[^/]+)\/(.+?)\/?$/i),t){let[,r,o]=t;return ri(r,o)}return M}var qa;function Ka(e){return qa||(qa=H(()=>{let t=__md_get("__source",sessionStorage);if(t)return R(t);if(ne("consent").length){let o=__md_get("__consent");if(!(o&&o.github))return M}return oi(e.href).pipe(T(o=>__md_set("__source",o,sessionStorage)))}).pipe(xe(()=>M),v(t=>Object.keys(t).length>0),m(t=>({facts:t})),Z(1)))}function ni(e){let t=U(":scope > :last-child",e);return H(()=>{let r=new x;return r.subscribe(({facts:o})=>{t.appendChild(bn(o)),t.classList.add("md-source__repository--active")}),Ka(e).pipe(T(o=>r.next(o)),A(()=>r.complete()),m(o=>P({ref:e},o)))})}function Qa(e,{viewport$:t,header$:r}){return Se(document.body).pipe(w(()=>mr(e,{header$:r,viewport$:t})),m(({offset:{y:o}})=>({hidden:o>=10})),te("hidden"))}function ii(e,t){return H(()=>{let r=new x;return r.subscribe({next({hidden:o}){e.hidden=o},complete(){e.hidden=!1}}),(G("navigation.tabs.sticky")?R({hidden:!1}):Qa(e,t)).pipe(T(o=>r.next(o)),A(()=>r.complete()),m(o=>P({ref:e},o)))})}function Ya(e,{viewport$:t,header$:r}){let o=new Map,n=W("[href^=\\#]",e);for(let a of n){let c=decodeURIComponent(a.hash.substring(1)),p=ce(`[id="${c}"]`);typeof p!="undefined"&&o.set(a,p)}let i=r.pipe(te("height"),m(({height:a})=>{let c=Oe("main"),p=U(":scope > :first-child",c);return a+.8*(p.offsetTop-c.offsetTop)}),de());return Se(document.body).pipe(te("height"),w(a=>H(()=>{let c=[];return R([...o].reduce((p,[l,f])=>{for(;c.length&&o.get(c[c.length-1]).tagName>=f.tagName;)c.pop();let u=f.offsetTop;for(;!u&&f.parentElement;)f=f.parentElement,u=f.offsetTop;let d=f.offsetParent;for(;d;d=d.offsetParent)u+=d.offsetTop;return p.set([...c=[...c,l]].reverse(),u)},new Map))}).pipe(m(c=>new Map([...c].sort(([,p],[,l])=>p-l))),Ze(i),w(([c,p])=>t.pipe(Fr(([l,f],{offset:{y:u},size:d})=>{let y=u+d.height>=Math.floor(a.height);for(;f.length;){let[,b]=f[0];if(b-p=u&&!y)f=[l.pop(),...f];else break}return[l,f]},[[],[...c]]),X((l,f)=>l[0]===f[0]&&l[1]===f[1])))))).pipe(m(([a,c])=>({prev:a.map(([p])=>p),next:c.map(([p])=>p)})),q({prev:[],next:[]}),Ce(2,1),m(([a,c])=>a.prev.length{let i=new x,s=i.pipe(ee(),oe(!0));if(i.subscribe(({prev:a,next:c})=>{for(let[p]of c)p.classList.remove("md-nav__link--passed"),p.classList.remove("md-nav__link--active");for(let[p,[l]]of a.entries())l.classList.add("md-nav__link--passed"),l.classList.toggle("md-nav__link--active",p===a.length-1)}),G("toc.follow")){let a=L(t.pipe(ye(1),m(()=>{})),t.pipe(ye(250),m(()=>"smooth")));i.pipe(v(({prev:c})=>c.length>0),Ze(o.pipe(Me(ie))),ae(a)).subscribe(([[{prev:c}],p])=>{let[l]=c[c.length-1];if(l.offsetHeight){let f=sr(l);if(typeof f!="undefined"){let u=l.offsetTop-f.offsetTop,{height:d}=le(f);f.scrollTo({top:u-d/2,behavior:p})}}})}return G("navigation.tracking")&&t.pipe(j(s),te("offset"),ye(250),Ee(1),j(n.pipe(Ee(1))),at({delay:250}),ae(i)).subscribe(([,{prev:a}])=>{let c=me(),p=a[a.length-1];if(p&&p.length){let[l]=p,{hash:f}=new URL(l.href);c.hash!==f&&(c.hash=f,history.replaceState({},"",`${c}`))}else c.hash="",history.replaceState({},"",`${c}`)}),Ya(e,{viewport$:t,header$:r}).pipe(T(a=>i.next(a)),A(()=>i.complete()),m(a=>P({ref:e},a)))})}function Ba(e,{viewport$:t,main$:r,target$:o}){let n=t.pipe(m(({offset:{y:s}})=>s),Ce(2,1),m(([s,a])=>s>a&&a>0),X()),i=r.pipe(m(({active:s})=>s));return B([i,n]).pipe(m(([s,a])=>!(s&&a)),X(),j(o.pipe(Ee(1))),oe(!0),at({delay:250}),m(s=>({hidden:s})))}function si(e,{viewport$:t,header$:r,main$:o,target$:n}){let i=new x,s=i.pipe(ee(),oe(!0));return i.subscribe({next({hidden:a}){e.hidden=a,a?(e.setAttribute("tabindex","-1"),e.blur()):e.removeAttribute("tabindex")},complete(){e.style.top="",e.hidden=!0,e.removeAttribute("tabindex")}}),r.pipe(j(s),te("height")).subscribe(({height:a})=>{e.style.top=`${a+16}px`}),h(e,"click").subscribe(a=>{a.preventDefault(),window.scrollTo({top:0})}),Ba(e,{viewport$:t,main$:o,target$:n}).pipe(T(a=>i.next(a)),A(()=>i.complete()),m(a=>P({ref:e},a)))}function ci({document$:e}){e.pipe(w(()=>W(".md-ellipsis")),re(t=>yt(t).pipe(j(e.pipe(Ee(1))),v(r=>r),m(()=>t),ue(1))),v(t=>t.offsetWidth{let r=t.innerText,o=t.closest("a")||t;return o.title=r,Be(o).pipe(j(e.pipe(Ee(1))),A(()=>o.removeAttribute("title")))})).subscribe(),e.pipe(w(()=>W(".md-status")),re(t=>Be(t))).subscribe()}function pi({document$:e,tablet$:t}){e.pipe(w(()=>W(".md-toggle--indeterminate")),T(r=>{r.indeterminate=!0,r.checked=!1}),re(r=>h(r,"change").pipe(Ur(()=>r.classList.contains("md-toggle--indeterminate")),m(()=>r))),ae(t)).subscribe(([r,o])=>{r.classList.remove("md-toggle--indeterminate"),o&&(r.checked=!1)})}function Ga(){return/(iPad|iPhone|iPod)/.test(navigator.userAgent)}function li({document$:e}){e.pipe(w(()=>W("[data-md-scrollfix]")),T(t=>t.removeAttribute("data-md-scrollfix")),v(Ga),re(t=>h(t,"touchstart").pipe(m(()=>t)))).subscribe(t=>{let r=t.scrollTop;r===0?t.scrollTop=1:r+t.offsetHeight===t.scrollHeight&&(t.scrollTop=r-1)})}function mi({viewport$:e,tablet$:t}){B([Ne("search"),t]).pipe(m(([r,o])=>r&&!o),w(r=>R(r).pipe(Qe(r?400:100))),ae(e)).subscribe(([r,{offset:{y:o}}])=>{if(r)document.body.setAttribute("data-md-scrolllock",""),document.body.style.top=`-${o}px`;else{let n=-1*parseInt(document.body.style.top,10);document.body.removeAttribute("data-md-scrolllock"),document.body.style.top="",n&&window.scrollTo(0,n)}})}Object.entries||(Object.entries=function(e){let t=[];for(let r of Object.keys(e))t.push([r,e[r]]);return t});Object.values||(Object.values=function(e){let t=[];for(let r of Object.keys(e))t.push(e[r]);return t});typeof Element!="undefined"&&(Element.prototype.scrollTo||(Element.prototype.scrollTo=function(e,t){typeof e=="object"?(this.scrollLeft=e.left,this.scrollTop=e.top):(this.scrollLeft=e,this.scrollTop=t)}),Element.prototype.replaceWith||(Element.prototype.replaceWith=function(...e){let t=this.parentNode;if(t){e.length===0&&t.removeChild(this);for(let r=e.length-1;r>=0;r--){let o=e[r];typeof o=="string"?o=document.createTextNode(o):o.parentNode&&o.parentNode.removeChild(o),r?t.insertBefore(this.previousSibling,o):t.replaceChild(o,this)}}}));function Ja(){return location.protocol==="file:"?gt(`${new URL("search/search_index.js",Gr.base)}`).pipe(m(()=>__index),Z(1)):De(new URL("search/search_index.json",Gr.base))}document.documentElement.classList.remove("no-js");document.documentElement.classList.add("js");var rt=zo(),Pt=Zo(),wt=tn(Pt),Jr=Xo(),_e=pn(),hr=At("(min-width: 960px)"),ui=At("(min-width: 1220px)"),di=rn(),Gr=he(),hi=document.forms.namedItem("search")?Ja():Ke,Xr=new x;Un({alert$:Xr});var Zr=new x;G("navigation.instant")&&Dn({location$:Pt,viewport$:_e,progress$:Zr}).subscribe(rt);var fi;((fi=Gr.version)==null?void 0:fi.provider)==="mike"&&Yn({document$:rt});L(Pt,wt).pipe(Qe(125)).subscribe(()=>{Ye("drawer",!1),Ye("search",!1)});Jr.pipe(v(({mode:e})=>e==="global")).subscribe(e=>{switch(e.type){case"p":case",":let t=ce("link[rel=prev]");typeof t!="undefined"&&st(t);break;case"n":case".":let r=ce("link[rel=next]");typeof r!="undefined"&&st(r);break;case"Enter":let o=Ie();o instanceof HTMLLabelElement&&o.click()}});ci({document$:rt});pi({document$:rt,tablet$:hr});li({document$:rt});mi({viewport$:_e,tablet$:hr});var tt=Pn(Oe("header"),{viewport$:_e}),$t=rt.pipe(m(()=>Oe("main")),w(e=>Fn(e,{viewport$:_e,header$:tt})),Z(1)),Xa=L(...ne("consent").map(e=>fn(e,{target$:wt})),...ne("dialog").map(e=>$n(e,{alert$:Xr})),...ne("header").map(e=>Rn(e,{viewport$:_e,header$:tt,main$:$t})),...ne("palette").map(e=>jn(e)),...ne("progress").map(e=>Wn(e,{progress$:Zr})),...ne("search").map(e=>Zn(e,{index$:hi,keyboard$:Jr})),...ne("source").map(e=>ni(e))),Za=H(()=>L(...ne("announce").map(e=>mn(e)),...ne("content").map(e=>Hn(e,{viewport$:_e,target$:wt,print$:di})),...ne("content").map(e=>G("search.highlight")?ei(e,{index$:hi,location$:Pt}):M),...ne("header-title").map(e=>In(e,{viewport$:_e,header$:tt})),...ne("sidebar").map(e=>e.getAttribute("data-md-type")==="navigation"?Dr(ui,()=>Br(e,{viewport$:_e,header$:tt,main$:$t})):Dr(hr,()=>Br(e,{viewport$:_e,header$:tt,main$:$t}))),...ne("tabs").map(e=>ii(e,{viewport$:_e,header$:tt})),...ne("toc").map(e=>ai(e,{viewport$:_e,header$:tt,main$:$t,target$:wt})),...ne("top").map(e=>si(e,{viewport$:_e,header$:tt,main$:$t,target$:wt})))),bi=rt.pipe(w(()=>Za),Re(Xa),Z(1));bi.subscribe();window.document$=rt;window.location$=Pt;window.target$=wt;window.keyboard$=Jr;window.viewport$=_e;window.tablet$=hr;window.screen$=ui;window.print$=di;window.alert$=Xr;window.progress$=Zr;window.component$=bi;})(); +//# sourceMappingURL=bundle.d7c377c4.min.js.map + diff --git a/0.9.7/assets/javascripts/bundle.d7c377c4.min.js.map b/0.9.7/assets/javascripts/bundle.d7c377c4.min.js.map new file mode 100644 index 0000000..a57d388 --- /dev/null +++ b/0.9.7/assets/javascripts/bundle.d7c377c4.min.js.map @@ -0,0 +1,7 @@ +{ + "version": 3, + "sources": ["node_modules/focus-visible/dist/focus-visible.js", "node_modules/clipboard/dist/clipboard.js", "node_modules/escape-html/index.js", "src/templates/assets/javascripts/bundle.ts", "node_modules/rxjs/node_modules/tslib/tslib.es6.js", "node_modules/rxjs/src/internal/util/isFunction.ts", "node_modules/rxjs/src/internal/util/createErrorClass.ts", "node_modules/rxjs/src/internal/util/UnsubscriptionError.ts", "node_modules/rxjs/src/internal/util/arrRemove.ts", "node_modules/rxjs/src/internal/Subscription.ts", "node_modules/rxjs/src/internal/config.ts", "node_modules/rxjs/src/internal/scheduler/timeoutProvider.ts", "node_modules/rxjs/src/internal/util/reportUnhandledError.ts", "node_modules/rxjs/src/internal/util/noop.ts", "node_modules/rxjs/src/internal/NotificationFactories.ts", "node_modules/rxjs/src/internal/util/errorContext.ts", "node_modules/rxjs/src/internal/Subscriber.ts", "node_modules/rxjs/src/internal/symbol/observable.ts", "node_modules/rxjs/src/internal/util/identity.ts", "node_modules/rxjs/src/internal/util/pipe.ts", "node_modules/rxjs/src/internal/Observable.ts", "node_modules/rxjs/src/internal/util/lift.ts", "node_modules/rxjs/src/internal/operators/OperatorSubscriber.ts", "node_modules/rxjs/src/internal/scheduler/animationFrameProvider.ts", "node_modules/rxjs/src/internal/util/ObjectUnsubscribedError.ts", "node_modules/rxjs/src/internal/Subject.ts", "node_modules/rxjs/src/internal/scheduler/dateTimestampProvider.ts", "node_modules/rxjs/src/internal/ReplaySubject.ts", "node_modules/rxjs/src/internal/scheduler/Action.ts", "node_modules/rxjs/src/internal/scheduler/intervalProvider.ts", "node_modules/rxjs/src/internal/scheduler/AsyncAction.ts", "node_modules/rxjs/src/internal/Scheduler.ts", "node_modules/rxjs/src/internal/scheduler/AsyncScheduler.ts", "node_modules/rxjs/src/internal/scheduler/async.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameAction.ts", "node_modules/rxjs/src/internal/scheduler/AnimationFrameScheduler.ts", "node_modules/rxjs/src/internal/scheduler/animationFrame.ts", "node_modules/rxjs/src/internal/observable/empty.ts", "node_modules/rxjs/src/internal/util/isScheduler.ts", "node_modules/rxjs/src/internal/util/args.ts", "node_modules/rxjs/src/internal/util/isArrayLike.ts", "node_modules/rxjs/src/internal/util/isPromise.ts", "node_modules/rxjs/src/internal/util/isInteropObservable.ts", "node_modules/rxjs/src/internal/util/isAsyncIterable.ts", "node_modules/rxjs/src/internal/util/throwUnobservableError.ts", "node_modules/rxjs/src/internal/symbol/iterator.ts", "node_modules/rxjs/src/internal/util/isIterable.ts", "node_modules/rxjs/src/internal/util/isReadableStreamLike.ts", "node_modules/rxjs/src/internal/observable/innerFrom.ts", "node_modules/rxjs/src/internal/util/executeSchedule.ts", "node_modules/rxjs/src/internal/operators/observeOn.ts", "node_modules/rxjs/src/internal/operators/subscribeOn.ts", "node_modules/rxjs/src/internal/scheduled/scheduleObservable.ts", "node_modules/rxjs/src/internal/scheduled/schedulePromise.ts", "node_modules/rxjs/src/internal/scheduled/scheduleArray.ts", "node_modules/rxjs/src/internal/scheduled/scheduleIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleAsyncIterable.ts", "node_modules/rxjs/src/internal/scheduled/scheduleReadableStreamLike.ts", "node_modules/rxjs/src/internal/scheduled/scheduled.ts", "node_modules/rxjs/src/internal/observable/from.ts", "node_modules/rxjs/src/internal/observable/of.ts", "node_modules/rxjs/src/internal/observable/throwError.ts", "node_modules/rxjs/src/internal/util/EmptyError.ts", "node_modules/rxjs/src/internal/util/isDate.ts", "node_modules/rxjs/src/internal/operators/map.ts", "node_modules/rxjs/src/internal/util/mapOneOrManyArgs.ts", "node_modules/rxjs/src/internal/util/argsArgArrayOrObject.ts", "node_modules/rxjs/src/internal/util/createObject.ts", "node_modules/rxjs/src/internal/observable/combineLatest.ts", "node_modules/rxjs/src/internal/operators/mergeInternals.ts", "node_modules/rxjs/src/internal/operators/mergeMap.ts", "node_modules/rxjs/src/internal/operators/mergeAll.ts", "node_modules/rxjs/src/internal/operators/concatAll.ts", "node_modules/rxjs/src/internal/observable/concat.ts", "node_modules/rxjs/src/internal/observable/defer.ts", "node_modules/rxjs/src/internal/observable/fromEvent.ts", "node_modules/rxjs/src/internal/observable/fromEventPattern.ts", "node_modules/rxjs/src/internal/observable/timer.ts", "node_modules/rxjs/src/internal/observable/merge.ts", "node_modules/rxjs/src/internal/observable/never.ts", "node_modules/rxjs/src/internal/util/argsOrArgArray.ts", "node_modules/rxjs/src/internal/operators/filter.ts", "node_modules/rxjs/src/internal/observable/zip.ts", "node_modules/rxjs/src/internal/operators/audit.ts", "node_modules/rxjs/src/internal/operators/auditTime.ts", "node_modules/rxjs/src/internal/operators/bufferCount.ts", "node_modules/rxjs/src/internal/operators/catchError.ts", "node_modules/rxjs/src/internal/operators/scanInternals.ts", "node_modules/rxjs/src/internal/operators/combineLatest.ts", "node_modules/rxjs/src/internal/operators/combineLatestWith.ts", "node_modules/rxjs/src/internal/operators/debounceTime.ts", "node_modules/rxjs/src/internal/operators/defaultIfEmpty.ts", "node_modules/rxjs/src/internal/operators/take.ts", "node_modules/rxjs/src/internal/operators/ignoreElements.ts", "node_modules/rxjs/src/internal/operators/mapTo.ts", "node_modules/rxjs/src/internal/operators/delayWhen.ts", "node_modules/rxjs/src/internal/operators/delay.ts", "node_modules/rxjs/src/internal/operators/distinctUntilChanged.ts", "node_modules/rxjs/src/internal/operators/distinctUntilKeyChanged.ts", "node_modules/rxjs/src/internal/operators/throwIfEmpty.ts", "node_modules/rxjs/src/internal/operators/endWith.ts", "node_modules/rxjs/src/internal/operators/finalize.ts", "node_modules/rxjs/src/internal/operators/first.ts", "node_modules/rxjs/src/internal/operators/takeLast.ts", "node_modules/rxjs/src/internal/operators/merge.ts", "node_modules/rxjs/src/internal/operators/mergeWith.ts", "node_modules/rxjs/src/internal/operators/repeat.ts", "node_modules/rxjs/src/internal/operators/sample.ts", "node_modules/rxjs/src/internal/operators/scan.ts", "node_modules/rxjs/src/internal/operators/share.ts", "node_modules/rxjs/src/internal/operators/shareReplay.ts", "node_modules/rxjs/src/internal/operators/skip.ts", "node_modules/rxjs/src/internal/operators/skipUntil.ts", "node_modules/rxjs/src/internal/operators/startWith.ts", "node_modules/rxjs/src/internal/operators/switchMap.ts", "node_modules/rxjs/src/internal/operators/takeUntil.ts", "node_modules/rxjs/src/internal/operators/takeWhile.ts", "node_modules/rxjs/src/internal/operators/tap.ts", "node_modules/rxjs/src/internal/operators/throttle.ts", "node_modules/rxjs/src/internal/operators/throttleTime.ts", "node_modules/rxjs/src/internal/operators/withLatestFrom.ts", "node_modules/rxjs/src/internal/operators/zip.ts", "node_modules/rxjs/src/internal/operators/zipWith.ts", "src/templates/assets/javascripts/browser/document/index.ts", "src/templates/assets/javascripts/browser/element/_/index.ts", "src/templates/assets/javascripts/browser/element/focus/index.ts", "src/templates/assets/javascripts/browser/element/hover/index.ts", "src/templates/assets/javascripts/browser/element/offset/_/index.ts", "src/templates/assets/javascripts/browser/element/offset/content/index.ts", "src/templates/assets/javascripts/utilities/h/index.ts", "src/templates/assets/javascripts/utilities/round/index.ts", "src/templates/assets/javascripts/browser/script/index.ts", "src/templates/assets/javascripts/browser/element/size/_/index.ts", "src/templates/assets/javascripts/browser/element/size/content/index.ts", "src/templates/assets/javascripts/browser/element/visibility/index.ts", "src/templates/assets/javascripts/browser/toggle/index.ts", "src/templates/assets/javascripts/browser/keyboard/index.ts", "src/templates/assets/javascripts/browser/location/_/index.ts", "src/templates/assets/javascripts/browser/location/hash/index.ts", "src/templates/assets/javascripts/browser/media/index.ts", "src/templates/assets/javascripts/browser/request/index.ts", "src/templates/assets/javascripts/browser/viewport/offset/index.ts", "src/templates/assets/javascripts/browser/viewport/size/index.ts", "src/templates/assets/javascripts/browser/viewport/_/index.ts", "src/templates/assets/javascripts/browser/viewport/at/index.ts", "src/templates/assets/javascripts/browser/worker/index.ts", "src/templates/assets/javascripts/_/index.ts", "src/templates/assets/javascripts/components/_/index.ts", "src/templates/assets/javascripts/components/announce/index.ts", "src/templates/assets/javascripts/components/consent/index.ts", "src/templates/assets/javascripts/templates/tooltip/index.tsx", "src/templates/assets/javascripts/templates/annotation/index.tsx", "src/templates/assets/javascripts/templates/clipboard/index.tsx", "src/templates/assets/javascripts/templates/search/index.tsx", "src/templates/assets/javascripts/templates/source/index.tsx", "src/templates/assets/javascripts/templates/tabbed/index.tsx", "src/templates/assets/javascripts/templates/table/index.tsx", "src/templates/assets/javascripts/templates/version/index.tsx", "src/templates/assets/javascripts/components/tooltip/index.ts", "src/templates/assets/javascripts/components/content/annotation/_/index.ts", "src/templates/assets/javascripts/components/content/annotation/list/index.ts", "src/templates/assets/javascripts/components/content/annotation/block/index.ts", "src/templates/assets/javascripts/components/content/code/_/index.ts", "src/templates/assets/javascripts/components/content/details/index.ts", "src/templates/assets/javascripts/components/content/mermaid/index.css", "src/templates/assets/javascripts/components/content/mermaid/index.ts", "src/templates/assets/javascripts/components/content/table/index.ts", "src/templates/assets/javascripts/components/content/tabs/index.ts", "src/templates/assets/javascripts/components/content/_/index.ts", "src/templates/assets/javascripts/components/dialog/index.ts", "src/templates/assets/javascripts/components/header/_/index.ts", "src/templates/assets/javascripts/components/header/title/index.ts", "src/templates/assets/javascripts/components/main/index.ts", "src/templates/assets/javascripts/components/palette/index.ts", "src/templates/assets/javascripts/components/progress/index.ts", "src/templates/assets/javascripts/integrations/clipboard/index.ts", "src/templates/assets/javascripts/integrations/sitemap/index.ts", "src/templates/assets/javascripts/integrations/instant/index.ts", "src/templates/assets/javascripts/integrations/search/highlighter/index.ts", "src/templates/assets/javascripts/integrations/search/worker/message/index.ts", "src/templates/assets/javascripts/integrations/search/worker/_/index.ts", "src/templates/assets/javascripts/integrations/version/index.ts", "src/templates/assets/javascripts/components/search/query/index.ts", "src/templates/assets/javascripts/components/search/result/index.ts", "src/templates/assets/javascripts/components/search/share/index.ts", "src/templates/assets/javascripts/components/search/suggest/index.ts", "src/templates/assets/javascripts/components/search/_/index.ts", "src/templates/assets/javascripts/components/search/highlight/index.ts", "src/templates/assets/javascripts/components/sidebar/index.ts", "src/templates/assets/javascripts/components/source/facts/github/index.ts", "src/templates/assets/javascripts/components/source/facts/gitlab/index.ts", "src/templates/assets/javascripts/components/source/facts/_/index.ts", "src/templates/assets/javascripts/components/source/_/index.ts", "src/templates/assets/javascripts/components/tabs/index.ts", "src/templates/assets/javascripts/components/toc/index.ts", "src/templates/assets/javascripts/components/top/index.ts", "src/templates/assets/javascripts/patches/ellipsis/index.ts", "src/templates/assets/javascripts/patches/indeterminate/index.ts", "src/templates/assets/javascripts/patches/scrollfix/index.ts", "src/templates/assets/javascripts/patches/scrolllock/index.ts", "src/templates/assets/javascripts/polyfills/index.ts"], + "sourcesContent": ["(function (global, factory) {\n typeof exports === 'object' && typeof module !== 'undefined' ? factory() :\n typeof define === 'function' && define.amd ? define(factory) :\n (factory());\n}(this, (function () { 'use strict';\n\n /**\n * Applies the :focus-visible polyfill at the given scope.\n * A scope in this case is either the top-level Document or a Shadow Root.\n *\n * @param {(Document|ShadowRoot)} scope\n * @see https://github.com/WICG/focus-visible\n */\n function applyFocusVisiblePolyfill(scope) {\n var hadKeyboardEvent = true;\n var hadFocusVisibleRecently = false;\n var hadFocusVisibleRecentlyTimeout = null;\n\n var inputTypesAllowlist = {\n text: true,\n search: true,\n url: true,\n tel: true,\n email: true,\n password: true,\n number: true,\n date: true,\n month: true,\n week: true,\n time: true,\n datetime: true,\n 'datetime-local': true\n };\n\n /**\n * Helper function for legacy browsers and iframes which sometimes focus\n * elements like document, body, and non-interactive SVG.\n * @param {Element} el\n */\n function isValidFocusTarget(el) {\n if (\n el &&\n el !== document &&\n el.nodeName !== 'HTML' &&\n el.nodeName !== 'BODY' &&\n 'classList' in el &&\n 'contains' in el.classList\n ) {\n return true;\n }\n return false;\n }\n\n /**\n * Computes whether the given element should automatically trigger the\n * `focus-visible` class being added, i.e. whether it should always match\n * `:focus-visible` when focused.\n * @param {Element} el\n * @return {boolean}\n */\n function focusTriggersKeyboardModality(el) {\n var type = el.type;\n var tagName = el.tagName;\n\n if (tagName === 'INPUT' && inputTypesAllowlist[type] && !el.readOnly) {\n return true;\n }\n\n if (tagName === 'TEXTAREA' && !el.readOnly) {\n return true;\n }\n\n if (el.isContentEditable) {\n return true;\n }\n\n return false;\n }\n\n /**\n * Add the `focus-visible` class to the given element if it was not added by\n * the author.\n * @param {Element} el\n */\n function addFocusVisibleClass(el) {\n if (el.classList.contains('focus-visible')) {\n return;\n }\n el.classList.add('focus-visible');\n el.setAttribute('data-focus-visible-added', '');\n }\n\n /**\n * Remove the `focus-visible` class from the given element if it was not\n * originally added by the author.\n * @param {Element} el\n */\n function removeFocusVisibleClass(el) {\n if (!el.hasAttribute('data-focus-visible-added')) {\n return;\n }\n el.classList.remove('focus-visible');\n el.removeAttribute('data-focus-visible-added');\n }\n\n /**\n * If the most recent user interaction was via the keyboard;\n * and the key press did not include a meta, alt/option, or control key;\n * then the modality is keyboard. Otherwise, the modality is not keyboard.\n * Apply `focus-visible` to any current active element and keep track\n * of our keyboard modality state with `hadKeyboardEvent`.\n * @param {KeyboardEvent} e\n */\n function onKeyDown(e) {\n if (e.metaKey || e.altKey || e.ctrlKey) {\n return;\n }\n\n if (isValidFocusTarget(scope.activeElement)) {\n addFocusVisibleClass(scope.activeElement);\n }\n\n hadKeyboardEvent = true;\n }\n\n /**\n * If at any point a user clicks with a pointing device, ensure that we change\n * the modality away from keyboard.\n * This avoids the situation where a user presses a key on an already focused\n * element, and then clicks on a different element, focusing it with a\n * pointing device, while we still think we're in keyboard modality.\n * @param {Event} e\n */\n function onPointerDown(e) {\n hadKeyboardEvent = false;\n }\n\n /**\n * On `focus`, add the `focus-visible` class to the target if:\n * - the target received focus as a result of keyboard navigation, or\n * - the event target is an element that will likely require interaction\n * via the keyboard (e.g. a text box)\n * @param {Event} e\n */\n function onFocus(e) {\n // Prevent IE from focusing the document or HTML element.\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (hadKeyboardEvent || focusTriggersKeyboardModality(e.target)) {\n addFocusVisibleClass(e.target);\n }\n }\n\n /**\n * On `blur`, remove the `focus-visible` class from the target.\n * @param {Event} e\n */\n function onBlur(e) {\n if (!isValidFocusTarget(e.target)) {\n return;\n }\n\n if (\n e.target.classList.contains('focus-visible') ||\n e.target.hasAttribute('data-focus-visible-added')\n ) {\n // To detect a tab/window switch, we look for a blur event followed\n // rapidly by a visibility change.\n // If we don't see a visibility change within 100ms, it's probably a\n // regular focus change.\n hadFocusVisibleRecently = true;\n window.clearTimeout(hadFocusVisibleRecentlyTimeout);\n hadFocusVisibleRecentlyTimeout = window.setTimeout(function() {\n hadFocusVisibleRecently = false;\n }, 100);\n removeFocusVisibleClass(e.target);\n }\n }\n\n /**\n * If the user changes tabs, keep track of whether or not the previously\n * focused element had .focus-visible.\n * @param {Event} e\n */\n function onVisibilityChange(e) {\n if (document.visibilityState === 'hidden') {\n // If the tab becomes active again, the browser will handle calling focus\n // on the element (Safari actually calls it twice).\n // If this tab change caused a blur on an element with focus-visible,\n // re-apply the class when the user switches back to the tab.\n if (hadFocusVisibleRecently) {\n hadKeyboardEvent = true;\n }\n addInitialPointerMoveListeners();\n }\n }\n\n /**\n * Add a group of listeners to detect usage of any pointing devices.\n * These listeners will be added when the polyfill first loads, and anytime\n * the window is blurred, so that they are active when the window regains\n * focus.\n */\n function addInitialPointerMoveListeners() {\n document.addEventListener('mousemove', onInitialPointerMove);\n document.addEventListener('mousedown', onInitialPointerMove);\n document.addEventListener('mouseup', onInitialPointerMove);\n document.addEventListener('pointermove', onInitialPointerMove);\n document.addEventListener('pointerdown', onInitialPointerMove);\n document.addEventListener('pointerup', onInitialPointerMove);\n document.addEventListener('touchmove', onInitialPointerMove);\n document.addEventListener('touchstart', onInitialPointerMove);\n document.addEventListener('touchend', onInitialPointerMove);\n }\n\n function removeInitialPointerMoveListeners() {\n document.removeEventListener('mousemove', onInitialPointerMove);\n document.removeEventListener('mousedown', onInitialPointerMove);\n document.removeEventListener('mouseup', onInitialPointerMove);\n document.removeEventListener('pointermove', onInitialPointerMove);\n document.removeEventListener('pointerdown', onInitialPointerMove);\n document.removeEventListener('pointerup', onInitialPointerMove);\n document.removeEventListener('touchmove', onInitialPointerMove);\n document.removeEventListener('touchstart', onInitialPointerMove);\n document.removeEventListener('touchend', onInitialPointerMove);\n }\n\n /**\n * When the polfyill first loads, assume the user is in keyboard modality.\n * If any event is received from a pointing device (e.g. mouse, pointer,\n * touch), turn off keyboard modality.\n * This accounts for situations where focus enters the page from the URL bar.\n * @param {Event} e\n */\n function onInitialPointerMove(e) {\n // Work around a Safari quirk that fires a mousemove on whenever the\n // window blurs, even if you're tabbing out of the page. \u00AF\\_(\u30C4)_/\u00AF\n if (e.target.nodeName && e.target.nodeName.toLowerCase() === 'html') {\n return;\n }\n\n hadKeyboardEvent = false;\n removeInitialPointerMoveListeners();\n }\n\n // For some kinds of state, we are interested in changes at the global scope\n // only. For example, global pointer input, global key presses and global\n // visibility change should affect the state at every scope:\n document.addEventListener('keydown', onKeyDown, true);\n document.addEventListener('mousedown', onPointerDown, true);\n document.addEventListener('pointerdown', onPointerDown, true);\n document.addEventListener('touchstart', onPointerDown, true);\n document.addEventListener('visibilitychange', onVisibilityChange, true);\n\n addInitialPointerMoveListeners();\n\n // For focus and blur, we specifically care about state changes in the local\n // scope. This is because focus / blur events that originate from within a\n // shadow root are not re-dispatched from the host element if it was already\n // the active element in its own scope:\n scope.addEventListener('focus', onFocus, true);\n scope.addEventListener('blur', onBlur, true);\n\n // We detect that a node is a ShadowRoot by ensuring that it is a\n // DocumentFragment and also has a host property. This check covers native\n // implementation and polyfill implementation transparently. If we only cared\n // about the native implementation, we could just check if the scope was\n // an instance of a ShadowRoot.\n if (scope.nodeType === Node.DOCUMENT_FRAGMENT_NODE && scope.host) {\n // Since a ShadowRoot is a special kind of DocumentFragment, it does not\n // have a root element to add a class to. So, we add this attribute to the\n // host element instead:\n scope.host.setAttribute('data-js-focus-visible', '');\n } else if (scope.nodeType === Node.DOCUMENT_NODE) {\n document.documentElement.classList.add('js-focus-visible');\n document.documentElement.setAttribute('data-js-focus-visible', '');\n }\n }\n\n // It is important to wrap all references to global window and document in\n // these checks to support server-side rendering use cases\n // @see https://github.com/WICG/focus-visible/issues/199\n if (typeof window !== 'undefined' && typeof document !== 'undefined') {\n // Make the polyfill helper globally available. This can be used as a signal\n // to interested libraries that wish to coordinate with the polyfill for e.g.,\n // applying the polyfill to a shadow root:\n window.applyFocusVisiblePolyfill = applyFocusVisiblePolyfill;\n\n // Notify interested libraries of the polyfill's presence, in case the\n // polyfill was loaded lazily:\n var event;\n\n try {\n event = new CustomEvent('focus-visible-polyfill-ready');\n } catch (error) {\n // IE11 does not support using CustomEvent as a constructor directly:\n event = document.createEvent('CustomEvent');\n event.initCustomEvent('focus-visible-polyfill-ready', false, false, {});\n }\n\n window.dispatchEvent(event);\n }\n\n if (typeof document !== 'undefined') {\n // Apply the polyfill to the global document, so that no JavaScript\n // coordination is required to use the polyfill in the top-level document:\n applyFocusVisiblePolyfill(document);\n }\n\n})));\n", "/*!\n * clipboard.js v2.0.11\n * https://clipboardjs.com/\n *\n * Licensed MIT \u00A9 Zeno Rocha\n */\n(function webpackUniversalModuleDefinition(root, factory) {\n\tif(typeof exports === 'object' && typeof module === 'object')\n\t\tmodule.exports = factory();\n\telse if(typeof define === 'function' && define.amd)\n\t\tdefine([], factory);\n\telse if(typeof exports === 'object')\n\t\texports[\"ClipboardJS\"] = factory();\n\telse\n\t\troot[\"ClipboardJS\"] = factory();\n})(this, function() {\nreturn /******/ (function() { // webpackBootstrap\n/******/ \tvar __webpack_modules__ = ({\n\n/***/ 686:\n/***/ (function(__unused_webpack_module, __webpack_exports__, __webpack_require__) {\n\n\"use strict\";\n\n// EXPORTS\n__webpack_require__.d(__webpack_exports__, {\n \"default\": function() { return /* binding */ clipboard; }\n});\n\n// EXTERNAL MODULE: ./node_modules/tiny-emitter/index.js\nvar tiny_emitter = __webpack_require__(279);\nvar tiny_emitter_default = /*#__PURE__*/__webpack_require__.n(tiny_emitter);\n// EXTERNAL MODULE: ./node_modules/good-listener/src/listen.js\nvar listen = __webpack_require__(370);\nvar listen_default = /*#__PURE__*/__webpack_require__.n(listen);\n// EXTERNAL MODULE: ./node_modules/select/src/select.js\nvar src_select = __webpack_require__(817);\nvar select_default = /*#__PURE__*/__webpack_require__.n(src_select);\n;// CONCATENATED MODULE: ./src/common/command.js\n/**\n * Executes a given operation type.\n * @param {String} type\n * @return {Boolean}\n */\nfunction command(type) {\n try {\n return document.execCommand(type);\n } catch (err) {\n return false;\n }\n}\n;// CONCATENATED MODULE: ./src/actions/cut.js\n\n\n/**\n * Cut action wrapper.\n * @param {String|HTMLElement} target\n * @return {String}\n */\n\nvar ClipboardActionCut = function ClipboardActionCut(target) {\n var selectedText = select_default()(target);\n command('cut');\n return selectedText;\n};\n\n/* harmony default export */ var actions_cut = (ClipboardActionCut);\n;// CONCATENATED MODULE: ./src/common/create-fake-element.js\n/**\n * Creates a fake textarea element with a value.\n * @param {String} value\n * @return {HTMLElement}\n */\nfunction createFakeElement(value) {\n var isRTL = document.documentElement.getAttribute('dir') === 'rtl';\n var fakeElement = document.createElement('textarea'); // Prevent zooming on iOS\n\n fakeElement.style.fontSize = '12pt'; // Reset box model\n\n fakeElement.style.border = '0';\n fakeElement.style.padding = '0';\n fakeElement.style.margin = '0'; // Move element out of screen horizontally\n\n fakeElement.style.position = 'absolute';\n fakeElement.style[isRTL ? 'right' : 'left'] = '-9999px'; // Move element to the same position vertically\n\n var yPosition = window.pageYOffset || document.documentElement.scrollTop;\n fakeElement.style.top = \"\".concat(yPosition, \"px\");\n fakeElement.setAttribute('readonly', '');\n fakeElement.value = value;\n return fakeElement;\n}\n;// CONCATENATED MODULE: ./src/actions/copy.js\n\n\n\n/**\n * Create fake copy action wrapper using a fake element.\n * @param {String} target\n * @param {Object} options\n * @return {String}\n */\n\nvar fakeCopyAction = function fakeCopyAction(value, options) {\n var fakeElement = createFakeElement(value);\n options.container.appendChild(fakeElement);\n var selectedText = select_default()(fakeElement);\n command('copy');\n fakeElement.remove();\n return selectedText;\n};\n/**\n * Copy action wrapper.\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @return {String}\n */\n\n\nvar ClipboardActionCopy = function ClipboardActionCopy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n var selectedText = '';\n\n if (typeof target === 'string') {\n selectedText = fakeCopyAction(target, options);\n } else if (target instanceof HTMLInputElement && !['text', 'search', 'url', 'tel', 'password'].includes(target === null || target === void 0 ? void 0 : target.type)) {\n // If input type doesn't support `setSelectionRange`. Simulate it. https://developer.mozilla.org/en-US/docs/Web/API/HTMLInputElement/setSelectionRange\n selectedText = fakeCopyAction(target.value, options);\n } else {\n selectedText = select_default()(target);\n command('copy');\n }\n\n return selectedText;\n};\n\n/* harmony default export */ var actions_copy = (ClipboardActionCopy);\n;// CONCATENATED MODULE: ./src/actions/default.js\nfunction _typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { _typeof = function _typeof(obj) { return typeof obj; }; } else { _typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return _typeof(obj); }\n\n\n\n/**\n * Inner function which performs selection from either `text` or `target`\n * properties and then executes copy or cut operations.\n * @param {Object} options\n */\n\nvar ClipboardActionDefault = function ClipboardActionDefault() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n // Defines base properties passed from constructor.\n var _options$action = options.action,\n action = _options$action === void 0 ? 'copy' : _options$action,\n container = options.container,\n target = options.target,\n text = options.text; // Sets the `action` to be performed which can be either 'copy' or 'cut'.\n\n if (action !== 'copy' && action !== 'cut') {\n throw new Error('Invalid \"action\" value, use either \"copy\" or \"cut\"');\n } // Sets the `target` property using an element that will be have its content copied.\n\n\n if (target !== undefined) {\n if (target && _typeof(target) === 'object' && target.nodeType === 1) {\n if (action === 'copy' && target.hasAttribute('disabled')) {\n throw new Error('Invalid \"target\" attribute. Please use \"readonly\" instead of \"disabled\" attribute');\n }\n\n if (action === 'cut' && (target.hasAttribute('readonly') || target.hasAttribute('disabled'))) {\n throw new Error('Invalid \"target\" attribute. You can\\'t cut text from elements with \"readonly\" or \"disabled\" attributes');\n }\n } else {\n throw new Error('Invalid \"target\" value, use a valid Element');\n }\n } // Define selection strategy based on `text` property.\n\n\n if (text) {\n return actions_copy(text, {\n container: container\n });\n } // Defines which selection strategy based on `target` property.\n\n\n if (target) {\n return action === 'cut' ? actions_cut(target) : actions_copy(target, {\n container: container\n });\n }\n};\n\n/* harmony default export */ var actions_default = (ClipboardActionDefault);\n;// CONCATENATED MODULE: ./src/clipboard.js\nfunction clipboard_typeof(obj) { \"@babel/helpers - typeof\"; if (typeof Symbol === \"function\" && typeof Symbol.iterator === \"symbol\") { clipboard_typeof = function _typeof(obj) { return typeof obj; }; } else { clipboard_typeof = function _typeof(obj) { return obj && typeof Symbol === \"function\" && obj.constructor === Symbol && obj !== Symbol.prototype ? \"symbol\" : typeof obj; }; } return clipboard_typeof(obj); }\n\nfunction _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError(\"Cannot call a class as a function\"); } }\n\nfunction _defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if (\"value\" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } }\n\nfunction _createClass(Constructor, protoProps, staticProps) { if (protoProps) _defineProperties(Constructor.prototype, protoProps); if (staticProps) _defineProperties(Constructor, staticProps); return Constructor; }\n\nfunction _inherits(subClass, superClass) { if (typeof superClass !== \"function\" && superClass !== null) { throw new TypeError(\"Super expression must either be null or a function\"); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, writable: true, configurable: true } }); if (superClass) _setPrototypeOf(subClass, superClass); }\n\nfunction _setPrototypeOf(o, p) { _setPrototypeOf = Object.setPrototypeOf || function _setPrototypeOf(o, p) { o.__proto__ = p; return o; }; return _setPrototypeOf(o, p); }\n\nfunction _createSuper(Derived) { var hasNativeReflectConstruct = _isNativeReflectConstruct(); return function _createSuperInternal() { var Super = _getPrototypeOf(Derived), result; if (hasNativeReflectConstruct) { var NewTarget = _getPrototypeOf(this).constructor; result = Reflect.construct(Super, arguments, NewTarget); } else { result = Super.apply(this, arguments); } return _possibleConstructorReturn(this, result); }; }\n\nfunction _possibleConstructorReturn(self, call) { if (call && (clipboard_typeof(call) === \"object\" || typeof call === \"function\")) { return call; } return _assertThisInitialized(self); }\n\nfunction _assertThisInitialized(self) { if (self === void 0) { throw new ReferenceError(\"this hasn't been initialised - super() hasn't been called\"); } return self; }\n\nfunction _isNativeReflectConstruct() { if (typeof Reflect === \"undefined\" || !Reflect.construct) return false; if (Reflect.construct.sham) return false; if (typeof Proxy === \"function\") return true; try { Date.prototype.toString.call(Reflect.construct(Date, [], function () {})); return true; } catch (e) { return false; } }\n\nfunction _getPrototypeOf(o) { _getPrototypeOf = Object.setPrototypeOf ? Object.getPrototypeOf : function _getPrototypeOf(o) { return o.__proto__ || Object.getPrototypeOf(o); }; return _getPrototypeOf(o); }\n\n\n\n\n\n\n/**\n * Helper function to retrieve attribute value.\n * @param {String} suffix\n * @param {Element} element\n */\n\nfunction getAttributeValue(suffix, element) {\n var attribute = \"data-clipboard-\".concat(suffix);\n\n if (!element.hasAttribute(attribute)) {\n return;\n }\n\n return element.getAttribute(attribute);\n}\n/**\n * Base class which takes one or more elements, adds event listeners to them,\n * and instantiates a new `ClipboardAction` on each click.\n */\n\n\nvar Clipboard = /*#__PURE__*/function (_Emitter) {\n _inherits(Clipboard, _Emitter);\n\n var _super = _createSuper(Clipboard);\n\n /**\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n * @param {Object} options\n */\n function Clipboard(trigger, options) {\n var _this;\n\n _classCallCheck(this, Clipboard);\n\n _this = _super.call(this);\n\n _this.resolveOptions(options);\n\n _this.listenClick(trigger);\n\n return _this;\n }\n /**\n * Defines if attributes would be resolved using internal setter functions\n * or custom functions that were passed in the constructor.\n * @param {Object} options\n */\n\n\n _createClass(Clipboard, [{\n key: \"resolveOptions\",\n value: function resolveOptions() {\n var options = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : {};\n this.action = typeof options.action === 'function' ? options.action : this.defaultAction;\n this.target = typeof options.target === 'function' ? options.target : this.defaultTarget;\n this.text = typeof options.text === 'function' ? options.text : this.defaultText;\n this.container = clipboard_typeof(options.container) === 'object' ? options.container : document.body;\n }\n /**\n * Adds a click event listener to the passed trigger.\n * @param {String|HTMLElement|HTMLCollection|NodeList} trigger\n */\n\n }, {\n key: \"listenClick\",\n value: function listenClick(trigger) {\n var _this2 = this;\n\n this.listener = listen_default()(trigger, 'click', function (e) {\n return _this2.onClick(e);\n });\n }\n /**\n * Defines a new `ClipboardAction` on each click event.\n * @param {Event} e\n */\n\n }, {\n key: \"onClick\",\n value: function onClick(e) {\n var trigger = e.delegateTarget || e.currentTarget;\n var action = this.action(trigger) || 'copy';\n var text = actions_default({\n action: action,\n container: this.container,\n target: this.target(trigger),\n text: this.text(trigger)\n }); // Fires an event based on the copy operation result.\n\n this.emit(text ? 'success' : 'error', {\n action: action,\n text: text,\n trigger: trigger,\n clearSelection: function clearSelection() {\n if (trigger) {\n trigger.focus();\n }\n\n window.getSelection().removeAllRanges();\n }\n });\n }\n /**\n * Default `action` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultAction\",\n value: function defaultAction(trigger) {\n return getAttributeValue('action', trigger);\n }\n /**\n * Default `target` lookup function.\n * @param {Element} trigger\n */\n\n }, {\n key: \"defaultTarget\",\n value: function defaultTarget(trigger) {\n var selector = getAttributeValue('target', trigger);\n\n if (selector) {\n return document.querySelector(selector);\n }\n }\n /**\n * Allow fire programmatically a copy action\n * @param {String|HTMLElement} target\n * @param {Object} options\n * @returns Text copied.\n */\n\n }, {\n key: \"defaultText\",\n\n /**\n * Default `text` lookup function.\n * @param {Element} trigger\n */\n value: function defaultText(trigger) {\n return getAttributeValue('text', trigger);\n }\n /**\n * Destroy lifecycle.\n */\n\n }, {\n key: \"destroy\",\n value: function destroy() {\n this.listener.destroy();\n }\n }], [{\n key: \"copy\",\n value: function copy(target) {\n var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {\n container: document.body\n };\n return actions_copy(target, options);\n }\n /**\n * Allow fire programmatically a cut action\n * @param {String|HTMLElement} target\n * @returns Text cutted.\n */\n\n }, {\n key: \"cut\",\n value: function cut(target) {\n return actions_cut(target);\n }\n /**\n * Returns the support of the given action, or all actions if no action is\n * given.\n * @param {String} [action]\n */\n\n }, {\n key: \"isSupported\",\n value: function isSupported() {\n var action = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : ['copy', 'cut'];\n var actions = typeof action === 'string' ? [action] : action;\n var support = !!document.queryCommandSupported;\n actions.forEach(function (action) {\n support = support && !!document.queryCommandSupported(action);\n });\n return support;\n }\n }]);\n\n return Clipboard;\n}((tiny_emitter_default()));\n\n/* harmony default export */ var clipboard = (Clipboard);\n\n/***/ }),\n\n/***/ 828:\n/***/ (function(module) {\n\nvar DOCUMENT_NODE_TYPE = 9;\n\n/**\n * A polyfill for Element.matches()\n */\nif (typeof Element !== 'undefined' && !Element.prototype.matches) {\n var proto = Element.prototype;\n\n proto.matches = proto.matchesSelector ||\n proto.mozMatchesSelector ||\n proto.msMatchesSelector ||\n proto.oMatchesSelector ||\n proto.webkitMatchesSelector;\n}\n\n/**\n * Finds the closest parent that matches a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @return {Function}\n */\nfunction closest (element, selector) {\n while (element && element.nodeType !== DOCUMENT_NODE_TYPE) {\n if (typeof element.matches === 'function' &&\n element.matches(selector)) {\n return element;\n }\n element = element.parentNode;\n }\n}\n\nmodule.exports = closest;\n\n\n/***/ }),\n\n/***/ 438:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar closest = __webpack_require__(828);\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction _delegate(element, selector, type, callback, useCapture) {\n var listenerFn = listener.apply(this, arguments);\n\n element.addEventListener(type, listenerFn, useCapture);\n\n return {\n destroy: function() {\n element.removeEventListener(type, listenerFn, useCapture);\n }\n }\n}\n\n/**\n * Delegates event to a selector.\n *\n * @param {Element|String|Array} [elements]\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @param {Boolean} useCapture\n * @return {Object}\n */\nfunction delegate(elements, selector, type, callback, useCapture) {\n // Handle the regular Element usage\n if (typeof elements.addEventListener === 'function') {\n return _delegate.apply(null, arguments);\n }\n\n // Handle Element-less usage, it defaults to global delegation\n if (typeof type === 'function') {\n // Use `document` as the first parameter, then apply arguments\n // This is a short way to .unshift `arguments` without running into deoptimizations\n return _delegate.bind(null, document).apply(null, arguments);\n }\n\n // Handle Selector-based usage\n if (typeof elements === 'string') {\n elements = document.querySelectorAll(elements);\n }\n\n // Handle Array-like based usage\n return Array.prototype.map.call(elements, function (element) {\n return _delegate(element, selector, type, callback, useCapture);\n });\n}\n\n/**\n * Finds closest match and invokes callback.\n *\n * @param {Element} element\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Function}\n */\nfunction listener(element, selector, type, callback) {\n return function(e) {\n e.delegateTarget = closest(e.target, selector);\n\n if (e.delegateTarget) {\n callback.call(element, e);\n }\n }\n}\n\nmodule.exports = delegate;\n\n\n/***/ }),\n\n/***/ 879:\n/***/ (function(__unused_webpack_module, exports) {\n\n/**\n * Check if argument is a HTML element.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.node = function(value) {\n return value !== undefined\n && value instanceof HTMLElement\n && value.nodeType === 1;\n};\n\n/**\n * Check if argument is a list of HTML elements.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.nodeList = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return value !== undefined\n && (type === '[object NodeList]' || type === '[object HTMLCollection]')\n && ('length' in value)\n && (value.length === 0 || exports.node(value[0]));\n};\n\n/**\n * Check if argument is a string.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.string = function(value) {\n return typeof value === 'string'\n || value instanceof String;\n};\n\n/**\n * Check if argument is a function.\n *\n * @param {Object} value\n * @return {Boolean}\n */\nexports.fn = function(value) {\n var type = Object.prototype.toString.call(value);\n\n return type === '[object Function]';\n};\n\n\n/***/ }),\n\n/***/ 370:\n/***/ (function(module, __unused_webpack_exports, __webpack_require__) {\n\nvar is = __webpack_require__(879);\nvar delegate = __webpack_require__(438);\n\n/**\n * Validates all params and calls the right\n * listener function based on its target type.\n *\n * @param {String|HTMLElement|HTMLCollection|NodeList} target\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listen(target, type, callback) {\n if (!target && !type && !callback) {\n throw new Error('Missing required arguments');\n }\n\n if (!is.string(type)) {\n throw new TypeError('Second argument must be a String');\n }\n\n if (!is.fn(callback)) {\n throw new TypeError('Third argument must be a Function');\n }\n\n if (is.node(target)) {\n return listenNode(target, type, callback);\n }\n else if (is.nodeList(target)) {\n return listenNodeList(target, type, callback);\n }\n else if (is.string(target)) {\n return listenSelector(target, type, callback);\n }\n else {\n throw new TypeError('First argument must be a String, HTMLElement, HTMLCollection, or NodeList');\n }\n}\n\n/**\n * Adds an event listener to a HTML element\n * and returns a remove listener function.\n *\n * @param {HTMLElement} node\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNode(node, type, callback) {\n node.addEventListener(type, callback);\n\n return {\n destroy: function() {\n node.removeEventListener(type, callback);\n }\n }\n}\n\n/**\n * Add an event listener to a list of HTML elements\n * and returns a remove listener function.\n *\n * @param {NodeList|HTMLCollection} nodeList\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenNodeList(nodeList, type, callback) {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.addEventListener(type, callback);\n });\n\n return {\n destroy: function() {\n Array.prototype.forEach.call(nodeList, function(node) {\n node.removeEventListener(type, callback);\n });\n }\n }\n}\n\n/**\n * Add an event listener to a selector\n * and returns a remove listener function.\n *\n * @param {String} selector\n * @param {String} type\n * @param {Function} callback\n * @return {Object}\n */\nfunction listenSelector(selector, type, callback) {\n return delegate(document.body, selector, type, callback);\n}\n\nmodule.exports = listen;\n\n\n/***/ }),\n\n/***/ 817:\n/***/ (function(module) {\n\nfunction select(element) {\n var selectedText;\n\n if (element.nodeName === 'SELECT') {\n element.focus();\n\n selectedText = element.value;\n }\n else if (element.nodeName === 'INPUT' || element.nodeName === 'TEXTAREA') {\n var isReadOnly = element.hasAttribute('readonly');\n\n if (!isReadOnly) {\n element.setAttribute('readonly', '');\n }\n\n element.select();\n element.setSelectionRange(0, element.value.length);\n\n if (!isReadOnly) {\n element.removeAttribute('readonly');\n }\n\n selectedText = element.value;\n }\n else {\n if (element.hasAttribute('contenteditable')) {\n element.focus();\n }\n\n var selection = window.getSelection();\n var range = document.createRange();\n\n range.selectNodeContents(element);\n selection.removeAllRanges();\n selection.addRange(range);\n\n selectedText = selection.toString();\n }\n\n return selectedText;\n}\n\nmodule.exports = select;\n\n\n/***/ }),\n\n/***/ 279:\n/***/ (function(module) {\n\nfunction E () {\n // Keep this empty so it's easier to inherit from\n // (via https://github.com/lipsmack from https://github.com/scottcorgan/tiny-emitter/issues/3)\n}\n\nE.prototype = {\n on: function (name, callback, ctx) {\n var e = this.e || (this.e = {});\n\n (e[name] || (e[name] = [])).push({\n fn: callback,\n ctx: ctx\n });\n\n return this;\n },\n\n once: function (name, callback, ctx) {\n var self = this;\n function listener () {\n self.off(name, listener);\n callback.apply(ctx, arguments);\n };\n\n listener._ = callback\n return this.on(name, listener, ctx);\n },\n\n emit: function (name) {\n var data = [].slice.call(arguments, 1);\n var evtArr = ((this.e || (this.e = {}))[name] || []).slice();\n var i = 0;\n var len = evtArr.length;\n\n for (i; i < len; i++) {\n evtArr[i].fn.apply(evtArr[i].ctx, data);\n }\n\n return this;\n },\n\n off: function (name, callback) {\n var e = this.e || (this.e = {});\n var evts = e[name];\n var liveEvents = [];\n\n if (evts && callback) {\n for (var i = 0, len = evts.length; i < len; i++) {\n if (evts[i].fn !== callback && evts[i].fn._ !== callback)\n liveEvents.push(evts[i]);\n }\n }\n\n // Remove event from queue to prevent memory leak\n // Suggested by https://github.com/lazd\n // Ref: https://github.com/scottcorgan/tiny-emitter/commit/c6ebfaa9bc973b33d110a84a307742b7cf94c953#commitcomment-5024910\n\n (liveEvents.length)\n ? e[name] = liveEvents\n : delete e[name];\n\n return this;\n }\n};\n\nmodule.exports = E;\nmodule.exports.TinyEmitter = E;\n\n\n/***/ })\n\n/******/ \t});\n/************************************************************************/\n/******/ \t// The module cache\n/******/ \tvar __webpack_module_cache__ = {};\n/******/ \t\n/******/ \t// The require function\n/******/ \tfunction __webpack_require__(moduleId) {\n/******/ \t\t// Check if module is in cache\n/******/ \t\tif(__webpack_module_cache__[moduleId]) {\n/******/ \t\t\treturn __webpack_module_cache__[moduleId].exports;\n/******/ \t\t}\n/******/ \t\t// Create a new module (and put it into the cache)\n/******/ \t\tvar module = __webpack_module_cache__[moduleId] = {\n/******/ \t\t\t// no module.id needed\n/******/ \t\t\t// no module.loaded needed\n/******/ \t\t\texports: {}\n/******/ \t\t};\n/******/ \t\n/******/ \t\t// Execute the module function\n/******/ \t\t__webpack_modules__[moduleId](module, module.exports, __webpack_require__);\n/******/ \t\n/******/ \t\t// Return the exports of the module\n/******/ \t\treturn module.exports;\n/******/ \t}\n/******/ \t\n/************************************************************************/\n/******/ \t/* webpack/runtime/compat get default export */\n/******/ \t!function() {\n/******/ \t\t// getDefaultExport function for compatibility with non-harmony modules\n/******/ \t\t__webpack_require__.n = function(module) {\n/******/ \t\t\tvar getter = module && module.__esModule ?\n/******/ \t\t\t\tfunction() { return module['default']; } :\n/******/ \t\t\t\tfunction() { return module; };\n/******/ \t\t\t__webpack_require__.d(getter, { a: getter });\n/******/ \t\t\treturn getter;\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/define property getters */\n/******/ \t!function() {\n/******/ \t\t// define getter functions for harmony exports\n/******/ \t\t__webpack_require__.d = function(exports, definition) {\n/******/ \t\t\tfor(var key in definition) {\n/******/ \t\t\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n/******/ \t\t\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n/******/ \t\t\t\t}\n/******/ \t\t\t}\n/******/ \t\t};\n/******/ \t}();\n/******/ \t\n/******/ \t/* webpack/runtime/hasOwnProperty shorthand */\n/******/ \t!function() {\n/******/ \t\t__webpack_require__.o = function(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }\n/******/ \t}();\n/******/ \t\n/************************************************************************/\n/******/ \t// module exports must be returned from runtime so entry inlining is disabled\n/******/ \t// startup\n/******/ \t// Load entry module and return exports\n/******/ \treturn __webpack_require__(686);\n/******/ })()\n.default;\n});", "/*!\n * escape-html\n * Copyright(c) 2012-2013 TJ Holowaychuk\n * Copyright(c) 2015 Andreas Lubbe\n * Copyright(c) 2015 Tiancheng \"Timothy\" Gu\n * MIT Licensed\n */\n\n'use strict';\n\n/**\n * Module variables.\n * @private\n */\n\nvar matchHtmlRegExp = /[\"'&<>]/;\n\n/**\n * Module exports.\n * @public\n */\n\nmodule.exports = escapeHtml;\n\n/**\n * Escape special characters in the given string of html.\n *\n * @param {string} string The string to escape for inserting into HTML\n * @return {string}\n * @public\n */\n\nfunction escapeHtml(string) {\n var str = '' + string;\n var match = matchHtmlRegExp.exec(str);\n\n if (!match) {\n return str;\n }\n\n var escape;\n var html = '';\n var index = 0;\n var lastIndex = 0;\n\n for (index = match.index; index < str.length; index++) {\n switch (str.charCodeAt(index)) {\n case 34: // \"\n escape = '"';\n break;\n case 38: // &\n escape = '&';\n break;\n case 39: // '\n escape = ''';\n break;\n case 60: // <\n escape = '<';\n break;\n case 62: // >\n escape = '>';\n break;\n default:\n continue;\n }\n\n if (lastIndex !== index) {\n html += str.substring(lastIndex, index);\n }\n\n lastIndex = index + 1;\n html += escape;\n }\n\n return lastIndex !== index\n ? html + str.substring(lastIndex, index)\n : html;\n}\n", "/*\n * Copyright (c) 2016-2023 Martin Donath \n *\n * Permission is hereby granted, free of charge, to any person obtaining a copy\n * of this software and associated documentation files (the \"Software\"), to\n * deal in the Software without restriction, including without limitation the\n * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or\n * sell copies of the Software, and to permit persons to whom the Software is\n * furnished to do so, subject to the following conditions:\n *\n * The above copyright notice and this permission notice shall be included in\n * all copies or substantial portions of the Software.\n *\n * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE\n * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS\n * IN THE SOFTWARE.\n */\n\nimport \"focus-visible\"\n\nimport {\n EMPTY,\n NEVER,\n Observable,\n Subject,\n defer,\n delay,\n filter,\n map,\n merge,\n mergeWith,\n shareReplay,\n switchMap\n} from \"rxjs\"\n\nimport { configuration, feature } from \"./_\"\nimport {\n at,\n getActiveElement,\n getOptionalElement,\n requestJSON,\n setLocation,\n setToggle,\n watchDocument,\n watchKeyboard,\n watchLocation,\n watchLocationTarget,\n watchMedia,\n watchPrint,\n watchScript,\n watchViewport\n} from \"./browser\"\nimport {\n getComponentElement,\n getComponentElements,\n mountAnnounce,\n mountBackToTop,\n mountConsent,\n mountContent,\n mountDialog,\n mountHeader,\n mountHeaderTitle,\n mountPalette,\n mountProgress,\n mountSearch,\n mountSearchHiglight,\n mountSidebar,\n mountSource,\n mountTableOfContents,\n mountTabs,\n watchHeader,\n watchMain\n} from \"./components\"\nimport {\n SearchIndex,\n setupClipboardJS,\n setupInstantNavigation,\n setupVersionSelector\n} from \"./integrations\"\nimport {\n patchEllipsis,\n patchIndeterminate,\n patchScrollfix,\n patchScrolllock\n} from \"./patches\"\nimport \"./polyfills\"\n\n/* ----------------------------------------------------------------------------\n * Functions - @todo refactor\n * ------------------------------------------------------------------------- */\n\n/**\n * Fetch search index\n *\n * @returns Search index observable\n */\nfunction fetchSearchIndex(): Observable {\n if (location.protocol === \"file:\") {\n return watchScript(\n `${new URL(\"search/search_index.js\", config.base)}`\n )\n .pipe(\n // @ts-ignore - @todo fix typings\n map(() => __index),\n shareReplay(1)\n )\n } else {\n return requestJSON(\n new URL(\"search/search_index.json\", config.base)\n )\n }\n}\n\n/* ----------------------------------------------------------------------------\n * Application\n * ------------------------------------------------------------------------- */\n\n/* Yay, JavaScript is available */\ndocument.documentElement.classList.remove(\"no-js\")\ndocument.documentElement.classList.add(\"js\")\n\n/* Set up navigation observables and subjects */\nconst document$ = watchDocument()\nconst location$ = watchLocation()\nconst target$ = watchLocationTarget(location$)\nconst keyboard$ = watchKeyboard()\n\n/* Set up media observables */\nconst viewport$ = watchViewport()\nconst tablet$ = watchMedia(\"(min-width: 960px)\")\nconst screen$ = watchMedia(\"(min-width: 1220px)\")\nconst print$ = watchPrint()\n\n/* Retrieve search index, if search is enabled */\nconst config = configuration()\nconst index$ = document.forms.namedItem(\"search\")\n ? fetchSearchIndex()\n : NEVER\n\n/* Set up Clipboard.js integration */\nconst alert$ = new Subject()\nsetupClipboardJS({ alert$ })\n\n/* Set up progress indicator */\nconst progress$ = new Subject()\n\n/* Set up instant navigation, if enabled */\nif (feature(\"navigation.instant\"))\n setupInstantNavigation({ location$, viewport$, progress$ })\n .subscribe(document$)\n\n/* Set up version selector */\nif (config.version?.provider === \"mike\")\n setupVersionSelector({ document$ })\n\n/* Always close drawer and search on navigation */\nmerge(location$, target$)\n .pipe(\n delay(125)\n )\n .subscribe(() => {\n setToggle(\"drawer\", false)\n setToggle(\"search\", false)\n })\n\n/* Set up global keyboard handlers */\nkeyboard$\n .pipe(\n filter(({ mode }) => mode === \"global\")\n )\n .subscribe(key => {\n switch (key.type) {\n\n /* Go to previous page */\n case \"p\":\n case \",\":\n const prev = getOptionalElement(\"link[rel=prev]\")\n if (typeof prev !== \"undefined\")\n setLocation(prev)\n break\n\n /* Go to next page */\n case \"n\":\n case \".\":\n const next = getOptionalElement(\"link[rel=next]\")\n if (typeof next !== \"undefined\")\n setLocation(next)\n break\n\n /* Expand navigation, see https://bit.ly/3ZjG5io */\n case \"Enter\":\n const active = getActiveElement()\n if (active instanceof HTMLLabelElement)\n active.click()\n }\n })\n\n/* Set up patches */\npatchEllipsis({ document$ })\npatchIndeterminate({ document$, tablet$ })\npatchScrollfix({ document$ })\npatchScrolllock({ viewport$, tablet$ })\n\n/* Set up header and main area observable */\nconst header$ = watchHeader(getComponentElement(\"header\"), { viewport$ })\nconst main$ = document$\n .pipe(\n map(() => getComponentElement(\"main\")),\n switchMap(el => watchMain(el, { viewport$, header$ })),\n shareReplay(1)\n )\n\n/* Set up control component observables */\nconst control$ = merge(\n\n /* Consent */\n ...getComponentElements(\"consent\")\n .map(el => mountConsent(el, { target$ })),\n\n /* Dialog */\n ...getComponentElements(\"dialog\")\n .map(el => mountDialog(el, { alert$ })),\n\n /* Header */\n ...getComponentElements(\"header\")\n .map(el => mountHeader(el, { viewport$, header$, main$ })),\n\n /* Color palette */\n ...getComponentElements(\"palette\")\n .map(el => mountPalette(el)),\n\n /* Progress bar */\n ...getComponentElements(\"progress\")\n .map(el => mountProgress(el, { progress$ })),\n\n /* Search */\n ...getComponentElements(\"search\")\n .map(el => mountSearch(el, { index$, keyboard$ })),\n\n /* Repository information */\n ...getComponentElements(\"source\")\n .map(el => mountSource(el))\n)\n\n/* Set up content component observables */\nconst content$ = defer(() => merge(\n\n /* Announcement bar */\n ...getComponentElements(\"announce\")\n .map(el => mountAnnounce(el)),\n\n /* Content */\n ...getComponentElements(\"content\")\n .map(el => mountContent(el, { viewport$, target$, print$ })),\n\n /* Search highlighting */\n ...getComponentElements(\"content\")\n .map(el => feature(\"search.highlight\")\n ? mountSearchHiglight(el, { index$, location$ })\n : EMPTY\n ),\n\n /* Header title */\n ...getComponentElements(\"header-title\")\n .map(el => mountHeaderTitle(el, { viewport$, header$ })),\n\n /* Sidebar */\n ...getComponentElements(\"sidebar\")\n .map(el => el.getAttribute(\"data-md-type\") === \"navigation\"\n ? at(screen$, () => mountSidebar(el, { viewport$, header$, main$ }))\n : at(tablet$, () => mountSidebar(el, { viewport$, header$, main$ }))\n ),\n\n /* Navigation tabs */\n ...getComponentElements(\"tabs\")\n .map(el => mountTabs(el, { viewport$, header$ })),\n\n /* Table of contents */\n ...getComponentElements(\"toc\")\n .map(el => mountTableOfContents(el, {\n viewport$, header$, main$, target$\n })),\n\n /* Back-to-top button */\n ...getComponentElements(\"top\")\n .map(el => mountBackToTop(el, { viewport$, header$, main$, target$ }))\n))\n\n/* Set up component observables */\nconst component$ = document$\n .pipe(\n switchMap(() => content$),\n mergeWith(control$),\n shareReplay(1)\n )\n\n/* Subscribe to all components */\ncomponent$.subscribe()\n\n/* ----------------------------------------------------------------------------\n * Exports\n * ------------------------------------------------------------------------- */\n\nwindow.document$ = document$ /* Document observable */\nwindow.location$ = location$ /* Location subject */\nwindow.target$ = target$ /* Location target observable */\nwindow.keyboard$ = keyboard$ /* Keyboard observable */\nwindow.viewport$ = viewport$ /* Viewport observable */\nwindow.tablet$ = tablet$ /* Media tablet observable */\nwindow.screen$ = screen$ /* Media screen observable */\nwindow.print$ = print$ /* Media print observable */\nwindow.alert$ = alert$ /* Alert subject */\nwindow.progress$ = progress$ /* Progress indicator subject */\nwindow.component$ = component$ /* Component observable */\n", "/*! *****************************************************************************\r\nCopyright (c) Microsoft Corporation.\r\n\r\nPermission to use, copy, modify, and/or distribute this software for any\r\npurpose with or without fee is hereby granted.\r\n\r\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH\r\nREGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY\r\nAND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,\r\nINDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM\r\nLOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR\r\nOTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR\r\nPERFORMANCE OF THIS SOFTWARE.\r\n***************************************************************************** */\r\n/* global Reflect, Promise */\r\n\r\nvar extendStatics = function(d, b) {\r\n extendStatics = Object.setPrototypeOf ||\r\n ({ __proto__: [] } instanceof Array && function (d, b) { d.__proto__ = b; }) ||\r\n function (d, b) { for (var p in b) if (Object.prototype.hasOwnProperty.call(b, p)) d[p] = b[p]; };\r\n return extendStatics(d, b);\r\n};\r\n\r\nexport function __extends(d, b) {\r\n if (typeof b !== \"function\" && b !== null)\r\n throw new TypeError(\"Class extends value \" + String(b) + \" is not a constructor or null\");\r\n extendStatics(d, b);\r\n function __() { this.constructor = d; }\r\n d.prototype = b === null ? Object.create(b) : (__.prototype = b.prototype, new __());\r\n}\r\n\r\nexport var __assign = function() {\r\n __assign = Object.assign || function __assign(t) {\r\n for (var s, i = 1, n = arguments.length; i < n; i++) {\r\n s = arguments[i];\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p)) t[p] = s[p];\r\n }\r\n return t;\r\n }\r\n return __assign.apply(this, arguments);\r\n}\r\n\r\nexport function __rest(s, e) {\r\n var t = {};\r\n for (var p in s) if (Object.prototype.hasOwnProperty.call(s, p) && e.indexOf(p) < 0)\r\n t[p] = s[p];\r\n if (s != null && typeof Object.getOwnPropertySymbols === \"function\")\r\n for (var i = 0, p = Object.getOwnPropertySymbols(s); i < p.length; i++) {\r\n if (e.indexOf(p[i]) < 0 && Object.prototype.propertyIsEnumerable.call(s, p[i]))\r\n t[p[i]] = s[p[i]];\r\n }\r\n return t;\r\n}\r\n\r\nexport function __decorate(decorators, target, key, desc) {\r\n var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;\r\n if (typeof Reflect === \"object\" && typeof Reflect.decorate === \"function\") r = Reflect.decorate(decorators, target, key, desc);\r\n else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;\r\n return c > 3 && r && Object.defineProperty(target, key, r), r;\r\n}\r\n\r\nexport function __param(paramIndex, decorator) {\r\n return function (target, key) { decorator(target, key, paramIndex); }\r\n}\r\n\r\nexport function __metadata(metadataKey, metadataValue) {\r\n if (typeof Reflect === \"object\" && typeof Reflect.metadata === \"function\") return Reflect.metadata(metadataKey, metadataValue);\r\n}\r\n\r\nexport function __awaiter(thisArg, _arguments, P, generator) {\r\n function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }\r\n return new (P || (P = Promise))(function (resolve, reject) {\r\n function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }\r\n function rejected(value) { try { step(generator[\"throw\"](value)); } catch (e) { reject(e); } }\r\n function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }\r\n step((generator = generator.apply(thisArg, _arguments || [])).next());\r\n });\r\n}\r\n\r\nexport function __generator(thisArg, body) {\r\n var _ = { label: 0, sent: function() { if (t[0] & 1) throw t[1]; return t[1]; }, trys: [], ops: [] }, f, y, t, g;\r\n return g = { next: verb(0), \"throw\": verb(1), \"return\": verb(2) }, typeof Symbol === \"function\" && (g[Symbol.iterator] = function() { return this; }), g;\r\n function verb(n) { return function (v) { return step([n, v]); }; }\r\n function step(op) {\r\n if (f) throw new TypeError(\"Generator is already executing.\");\r\n while (_) try {\r\n if (f = 1, y && (t = op[0] & 2 ? y[\"return\"] : op[0] ? y[\"throw\"] || ((t = y[\"return\"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;\r\n if (y = 0, t) op = [op[0] & 2, t.value];\r\n switch (op[0]) {\r\n case 0: case 1: t = op; break;\r\n case 4: _.label++; return { value: op[1], done: false };\r\n case 5: _.label++; y = op[1]; op = [0]; continue;\r\n case 7: op = _.ops.pop(); _.trys.pop(); continue;\r\n default:\r\n if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) { _ = 0; continue; }\r\n if (op[0] === 3 && (!t || (op[1] > t[0] && op[1] < t[3]))) { _.label = op[1]; break; }\r\n if (op[0] === 6 && _.label < t[1]) { _.label = t[1]; t = op; break; }\r\n if (t && _.label < t[2]) { _.label = t[2]; _.ops.push(op); break; }\r\n if (t[2]) _.ops.pop();\r\n _.trys.pop(); continue;\r\n }\r\n op = body.call(thisArg, _);\r\n } catch (e) { op = [6, e]; y = 0; } finally { f = t = 0; }\r\n if (op[0] & 5) throw op[1]; return { value: op[0] ? op[1] : void 0, done: true };\r\n }\r\n}\r\n\r\nexport var __createBinding = Object.create ? (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n Object.defineProperty(o, k2, { enumerable: true, get: function() { return m[k]; } });\r\n}) : (function(o, m, k, k2) {\r\n if (k2 === undefined) k2 = k;\r\n o[k2] = m[k];\r\n});\r\n\r\nexport function __exportStar(m, o) {\r\n for (var p in m) if (p !== \"default\" && !Object.prototype.hasOwnProperty.call(o, p)) __createBinding(o, m, p);\r\n}\r\n\r\nexport function __values(o) {\r\n var s = typeof Symbol === \"function\" && Symbol.iterator, m = s && o[s], i = 0;\r\n if (m) return m.call(o);\r\n if (o && typeof o.length === \"number\") return {\r\n next: function () {\r\n if (o && i >= o.length) o = void 0;\r\n return { value: o && o[i++], done: !o };\r\n }\r\n };\r\n throw new TypeError(s ? \"Object is not iterable.\" : \"Symbol.iterator is not defined.\");\r\n}\r\n\r\nexport function __read(o, n) {\r\n var m = typeof Symbol === \"function\" && o[Symbol.iterator];\r\n if (!m) return o;\r\n var i = m.call(o), r, ar = [], e;\r\n try {\r\n while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);\r\n }\r\n catch (error) { e = { error: error }; }\r\n finally {\r\n try {\r\n if (r && !r.done && (m = i[\"return\"])) m.call(i);\r\n }\r\n finally { if (e) throw e.error; }\r\n }\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spread() {\r\n for (var ar = [], i = 0; i < arguments.length; i++)\r\n ar = ar.concat(__read(arguments[i]));\r\n return ar;\r\n}\r\n\r\n/** @deprecated */\r\nexport function __spreadArrays() {\r\n for (var s = 0, i = 0, il = arguments.length; i < il; i++) s += arguments[i].length;\r\n for (var r = Array(s), k = 0, i = 0; i < il; i++)\r\n for (var a = arguments[i], j = 0, jl = a.length; j < jl; j++, k++)\r\n r[k] = a[j];\r\n return r;\r\n}\r\n\r\nexport function __spreadArray(to, from, pack) {\r\n if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {\r\n if (ar || !(i in from)) {\r\n if (!ar) ar = Array.prototype.slice.call(from, 0, i);\r\n ar[i] = from[i];\r\n }\r\n }\r\n return to.concat(ar || Array.prototype.slice.call(from));\r\n}\r\n\r\nexport function __await(v) {\r\n return this instanceof __await ? (this.v = v, this) : new __await(v);\r\n}\r\n\r\nexport function __asyncGenerator(thisArg, _arguments, generator) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var g = generator.apply(thisArg, _arguments || []), i, q = [];\r\n return i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i;\r\n function verb(n) { if (g[n]) i[n] = function (v) { return new Promise(function (a, b) { q.push([n, v, a, b]) > 1 || resume(n, v); }); }; }\r\n function resume(n, v) { try { step(g[n](v)); } catch (e) { settle(q[0][3], e); } }\r\n function step(r) { r.value instanceof __await ? Promise.resolve(r.value.v).then(fulfill, reject) : settle(q[0][2], r); }\r\n function fulfill(value) { resume(\"next\", value); }\r\n function reject(value) { resume(\"throw\", value); }\r\n function settle(f, v) { if (f(v), q.shift(), q.length) resume(q[0][0], q[0][1]); }\r\n}\r\n\r\nexport function __asyncDelegator(o) {\r\n var i, p;\r\n return i = {}, verb(\"next\"), verb(\"throw\", function (e) { throw e; }), verb(\"return\"), i[Symbol.iterator] = function () { return this; }, i;\r\n function verb(n, f) { i[n] = o[n] ? function (v) { return (p = !p) ? { value: __await(o[n](v)), done: n === \"return\" } : f ? f(v) : v; } : f; }\r\n}\r\n\r\nexport function __asyncValues(o) {\r\n if (!Symbol.asyncIterator) throw new TypeError(\"Symbol.asyncIterator is not defined.\");\r\n var m = o[Symbol.asyncIterator], i;\r\n return m ? m.call(o) : (o = typeof __values === \"function\" ? __values(o) : o[Symbol.iterator](), i = {}, verb(\"next\"), verb(\"throw\"), verb(\"return\"), i[Symbol.asyncIterator] = function () { return this; }, i);\r\n function verb(n) { i[n] = o[n] && function (v) { return new Promise(function (resolve, reject) { v = o[n](v), settle(resolve, reject, v.done, v.value); }); }; }\r\n function settle(resolve, reject, d, v) { Promise.resolve(v).then(function(v) { resolve({ value: v, done: d }); }, reject); }\r\n}\r\n\r\nexport function __makeTemplateObject(cooked, raw) {\r\n if (Object.defineProperty) { Object.defineProperty(cooked, \"raw\", { value: raw }); } else { cooked.raw = raw; }\r\n return cooked;\r\n};\r\n\r\nvar __setModuleDefault = Object.create ? (function(o, v) {\r\n Object.defineProperty(o, \"default\", { enumerable: true, value: v });\r\n}) : function(o, v) {\r\n o[\"default\"] = v;\r\n};\r\n\r\nexport function __importStar(mod) {\r\n if (mod && mod.__esModule) return mod;\r\n var result = {};\r\n if (mod != null) for (var k in mod) if (k !== \"default\" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);\r\n __setModuleDefault(result, mod);\r\n return result;\r\n}\r\n\r\nexport function __importDefault(mod) {\r\n return (mod && mod.__esModule) ? mod : { default: mod };\r\n}\r\n\r\nexport function __classPrivateFieldGet(receiver, state, kind, f) {\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a getter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot read private member from an object whose class did not declare it\");\r\n return kind === \"m\" ? f : kind === \"a\" ? f.call(receiver) : f ? f.value : state.get(receiver);\r\n}\r\n\r\nexport function __classPrivateFieldSet(receiver, state, value, kind, f) {\r\n if (kind === \"m\") throw new TypeError(\"Private method is not writable\");\r\n if (kind === \"a\" && !f) throw new TypeError(\"Private accessor was defined without a setter\");\r\n if (typeof state === \"function\" ? receiver !== state || !f : !state.has(receiver)) throw new TypeError(\"Cannot write private member to an object whose class did not declare it\");\r\n return (kind === \"a\" ? f.call(receiver, value) : f ? f.value = value : state.set(receiver, value)), value;\r\n}\r\n", "/**\n * Returns true if the object is a function.\n * @param value The value to check\n */\nexport function isFunction(value: any): value is (...args: any[]) => any {\n return typeof value === 'function';\n}\n", "/**\n * Used to create Error subclasses until the community moves away from ES5.\n *\n * This is because compiling from TypeScript down to ES5 has issues with subclassing Errors\n * as well as other built-in types: https://github.com/Microsoft/TypeScript/issues/12123\n *\n * @param createImpl A factory function to create the actual constructor implementation. The returned\n * function should be a named function that calls `_super` internally.\n */\nexport function createErrorClass(createImpl: (_super: any) => any): T {\n const _super = (instance: any) => {\n Error.call(instance);\n instance.stack = new Error().stack;\n };\n\n const ctorFunc = createImpl(_super);\n ctorFunc.prototype = Object.create(Error.prototype);\n ctorFunc.prototype.constructor = ctorFunc;\n return ctorFunc;\n}\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface UnsubscriptionError extends Error {\n readonly errors: any[];\n}\n\nexport interface UnsubscriptionErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (errors: any[]): UnsubscriptionError;\n}\n\n/**\n * An error thrown when one or more errors have occurred during the\n * `unsubscribe` of a {@link Subscription}.\n */\nexport const UnsubscriptionError: UnsubscriptionErrorCtor = createErrorClass(\n (_super) =>\n function UnsubscriptionErrorImpl(this: any, errors: (Error | string)[]) {\n _super(this);\n this.message = errors\n ? `${errors.length} errors occurred during unsubscription:\n${errors.map((err, i) => `${i + 1}) ${err.toString()}`).join('\\n ')}`\n : '';\n this.name = 'UnsubscriptionError';\n this.errors = errors;\n }\n);\n", "/**\n * Removes an item from an array, mutating it.\n * @param arr The array to remove the item from\n * @param item The item to remove\n */\nexport function arrRemove(arr: T[] | undefined | null, item: T) {\n if (arr) {\n const index = arr.indexOf(item);\n 0 <= index && arr.splice(index, 1);\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { UnsubscriptionError } from './util/UnsubscriptionError';\nimport { SubscriptionLike, TeardownLogic, Unsubscribable } from './types';\nimport { arrRemove } from './util/arrRemove';\n\n/**\n * Represents a disposable resource, such as the execution of an Observable. A\n * Subscription has one important method, `unsubscribe`, that takes no argument\n * and just disposes the resource held by the subscription.\n *\n * Additionally, subscriptions may be grouped together through the `add()`\n * method, which will attach a child Subscription to the current Subscription.\n * When a Subscription is unsubscribed, all its children (and its grandchildren)\n * will be unsubscribed as well.\n *\n * @class Subscription\n */\nexport class Subscription implements SubscriptionLike {\n /** @nocollapse */\n public static EMPTY = (() => {\n const empty = new Subscription();\n empty.closed = true;\n return empty;\n })();\n\n /**\n * A flag to indicate whether this Subscription has already been unsubscribed.\n */\n public closed = false;\n\n private _parentage: Subscription[] | Subscription | null = null;\n\n /**\n * The list of registered finalizers to execute upon unsubscription. Adding and removing from this\n * list occurs in the {@link #add} and {@link #remove} methods.\n */\n private _finalizers: Exclude[] | null = null;\n\n /**\n * @param initialTeardown A function executed first as part of the finalization\n * process that is kicked off when {@link #unsubscribe} is called.\n */\n constructor(private initialTeardown?: () => void) {}\n\n /**\n * Disposes the resources held by the subscription. May, for instance, cancel\n * an ongoing Observable execution or cancel any other type of work that\n * started when the Subscription was created.\n * @return {void}\n */\n unsubscribe(): void {\n let errors: any[] | undefined;\n\n if (!this.closed) {\n this.closed = true;\n\n // Remove this from it's parents.\n const { _parentage } = this;\n if (_parentage) {\n this._parentage = null;\n if (Array.isArray(_parentage)) {\n for (const parent of _parentage) {\n parent.remove(this);\n }\n } else {\n _parentage.remove(this);\n }\n }\n\n const { initialTeardown: initialFinalizer } = this;\n if (isFunction(initialFinalizer)) {\n try {\n initialFinalizer();\n } catch (e) {\n errors = e instanceof UnsubscriptionError ? e.errors : [e];\n }\n }\n\n const { _finalizers } = this;\n if (_finalizers) {\n this._finalizers = null;\n for (const finalizer of _finalizers) {\n try {\n execFinalizer(finalizer);\n } catch (err) {\n errors = errors ?? [];\n if (err instanceof UnsubscriptionError) {\n errors = [...errors, ...err.errors];\n } else {\n errors.push(err);\n }\n }\n }\n }\n\n if (errors) {\n throw new UnsubscriptionError(errors);\n }\n }\n }\n\n /**\n * Adds a finalizer to this subscription, so that finalization will be unsubscribed/called\n * when this subscription is unsubscribed. If this subscription is already {@link #closed},\n * because it has already been unsubscribed, then whatever finalizer is passed to it\n * will automatically be executed (unless the finalizer itself is also a closed subscription).\n *\n * Closed Subscriptions cannot be added as finalizers to any subscription. Adding a closed\n * subscription to a any subscription will result in no operation. (A noop).\n *\n * Adding a subscription to itself, or adding `null` or `undefined` will not perform any\n * operation at all. (A noop).\n *\n * `Subscription` instances that are added to this instance will automatically remove themselves\n * if they are unsubscribed. Functions and {@link Unsubscribable} objects that you wish to remove\n * will need to be removed manually with {@link #remove}\n *\n * @param teardown The finalization logic to add to this subscription.\n */\n add(teardown: TeardownLogic): void {\n // Only add the finalizer if it's not undefined\n // and don't add a subscription to itself.\n if (teardown && teardown !== this) {\n if (this.closed) {\n // If this subscription is already closed,\n // execute whatever finalizer is handed to it automatically.\n execFinalizer(teardown);\n } else {\n if (teardown instanceof Subscription) {\n // We don't add closed subscriptions, and we don't add the same subscription\n // twice. Subscription unsubscribe is idempotent.\n if (teardown.closed || teardown._hasParent(this)) {\n return;\n }\n teardown._addParent(this);\n }\n (this._finalizers = this._finalizers ?? []).push(teardown);\n }\n }\n }\n\n /**\n * Checks to see if a this subscription already has a particular parent.\n * This will signal that this subscription has already been added to the parent in question.\n * @param parent the parent to check for\n */\n private _hasParent(parent: Subscription) {\n const { _parentage } = this;\n return _parentage === parent || (Array.isArray(_parentage) && _parentage.includes(parent));\n }\n\n /**\n * Adds a parent to this subscription so it can be removed from the parent if it\n * unsubscribes on it's own.\n *\n * NOTE: THIS ASSUMES THAT {@link _hasParent} HAS ALREADY BEEN CHECKED.\n * @param parent The parent subscription to add\n */\n private _addParent(parent: Subscription) {\n const { _parentage } = this;\n this._parentage = Array.isArray(_parentage) ? (_parentage.push(parent), _parentage) : _parentage ? [_parentage, parent] : parent;\n }\n\n /**\n * Called on a child when it is removed via {@link #remove}.\n * @param parent The parent to remove\n */\n private _removeParent(parent: Subscription) {\n const { _parentage } = this;\n if (_parentage === parent) {\n this._parentage = null;\n } else if (Array.isArray(_parentage)) {\n arrRemove(_parentage, parent);\n }\n }\n\n /**\n * Removes a finalizer from this subscription that was previously added with the {@link #add} method.\n *\n * Note that `Subscription` instances, when unsubscribed, will automatically remove themselves\n * from every other `Subscription` they have been added to. This means that using the `remove` method\n * is not a common thing and should be used thoughtfully.\n *\n * If you add the same finalizer instance of a function or an unsubscribable object to a `Subscription` instance\n * more than once, you will need to call `remove` the same number of times to remove all instances.\n *\n * All finalizer instances are removed to free up memory upon unsubscription.\n *\n * @param teardown The finalizer to remove from this subscription\n */\n remove(teardown: Exclude): void {\n const { _finalizers } = this;\n _finalizers && arrRemove(_finalizers, teardown);\n\n if (teardown instanceof Subscription) {\n teardown._removeParent(this);\n }\n }\n}\n\nexport const EMPTY_SUBSCRIPTION = Subscription.EMPTY;\n\nexport function isSubscription(value: any): value is Subscription {\n return (\n value instanceof Subscription ||\n (value && 'closed' in value && isFunction(value.remove) && isFunction(value.add) && isFunction(value.unsubscribe))\n );\n}\n\nfunction execFinalizer(finalizer: Unsubscribable | (() => void)) {\n if (isFunction(finalizer)) {\n finalizer();\n } else {\n finalizer.unsubscribe();\n }\n}\n", "import { Subscriber } from './Subscriber';\nimport { ObservableNotification } from './types';\n\n/**\n * The {@link GlobalConfig} object for RxJS. It is used to configure things\n * like how to react on unhandled errors.\n */\nexport const config: GlobalConfig = {\n onUnhandledError: null,\n onStoppedNotification: null,\n Promise: undefined,\n useDeprecatedSynchronousErrorHandling: false,\n useDeprecatedNextContext: false,\n};\n\n/**\n * The global configuration object for RxJS, used to configure things\n * like how to react on unhandled errors. Accessible via {@link config}\n * object.\n */\nexport interface GlobalConfig {\n /**\n * A registration point for unhandled errors from RxJS. These are errors that\n * cannot were not handled by consuming code in the usual subscription path. For\n * example, if you have this configured, and you subscribe to an observable without\n * providing an error handler, errors from that subscription will end up here. This\n * will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onUnhandledError: ((err: any) => void) | null;\n\n /**\n * A registration point for notifications that cannot be sent to subscribers because they\n * have completed, errored or have been explicitly unsubscribed. By default, next, complete\n * and error notifications sent to stopped subscribers are noops. However, sometimes callers\n * might want a different behavior. For example, with sources that attempt to report errors\n * to stopped subscribers, a caller can configure RxJS to throw an unhandled error instead.\n * This will _always_ be called asynchronously on another job in the runtime. This is because\n * we do not want errors thrown in this user-configured handler to interfere with the\n * behavior of the library.\n */\n onStoppedNotification: ((notification: ObservableNotification, subscriber: Subscriber) => void) | null;\n\n /**\n * The promise constructor used by default for {@link Observable#toPromise toPromise} and {@link Observable#forEach forEach}\n * methods.\n *\n * @deprecated As of version 8, RxJS will no longer support this sort of injection of a\n * Promise constructor. If you need a Promise implementation other than native promises,\n * please polyfill/patch Promise as you see appropriate. Will be removed in v8.\n */\n Promise?: PromiseConstructorLike;\n\n /**\n * If true, turns on synchronous error rethrowing, which is a deprecated behavior\n * in v6 and higher. This behavior enables bad patterns like wrapping a subscribe\n * call in a try/catch block. It also enables producer interference, a nasty bug\n * where a multicast can be broken for all observers by a downstream consumer with\n * an unhandled error. DO NOT USE THIS FLAG UNLESS IT'S NEEDED TO BUY TIME\n * FOR MIGRATION REASONS.\n *\n * @deprecated As of version 8, RxJS will no longer support synchronous throwing\n * of unhandled errors. All errors will be thrown on a separate call stack to prevent bad\n * behaviors described above. Will be removed in v8.\n */\n useDeprecatedSynchronousErrorHandling: boolean;\n\n /**\n * If true, enables an as-of-yet undocumented feature from v5: The ability to access\n * `unsubscribe()` via `this` context in `next` functions created in observers passed\n * to `subscribe`.\n *\n * This is being removed because the performance was severely problematic, and it could also cause\n * issues when types other than POJOs are passed to subscribe as subscribers, as they will likely have\n * their `this` context overwritten.\n *\n * @deprecated As of version 8, RxJS will no longer support altering the\n * context of next functions provided as part of an observer to Subscribe. Instead,\n * you will have access to a subscription or a signal or token that will allow you to do things like\n * unsubscribe and test closed status. Will be removed in v8.\n */\n useDeprecatedNextContext: boolean;\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetTimeoutFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearTimeoutFunction = (handle: TimerHandle) => void;\n\ninterface TimeoutProvider {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n delegate:\n | {\n setTimeout: SetTimeoutFunction;\n clearTimeout: ClearTimeoutFunction;\n }\n | undefined;\n}\n\nexport const timeoutProvider: TimeoutProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setTimeout(handler: () => void, timeout?: number, ...args) {\n const { delegate } = timeoutProvider;\n if (delegate?.setTimeout) {\n return delegate.setTimeout(handler, timeout, ...args);\n }\n return setTimeout(handler, timeout, ...args);\n },\n clearTimeout(handle) {\n const { delegate } = timeoutProvider;\n return (delegate?.clearTimeout || clearTimeout)(handle as any);\n },\n delegate: undefined,\n};\n", "import { config } from '../config';\nimport { timeoutProvider } from '../scheduler/timeoutProvider';\n\n/**\n * Handles an error on another job either with the user-configured {@link onUnhandledError},\n * or by throwing it on that new job so it can be picked up by `window.onerror`, `process.on('error')`, etc.\n *\n * This should be called whenever there is an error that is out-of-band with the subscription\n * or when an error hits a terminal boundary of the subscription and no error handler was provided.\n *\n * @param err the error to report\n */\nexport function reportUnhandledError(err: any) {\n timeoutProvider.setTimeout(() => {\n const { onUnhandledError } = config;\n if (onUnhandledError) {\n // Execute the user-configured error handler.\n onUnhandledError(err);\n } else {\n // Throw so it is picked up by the runtime's uncaught error mechanism.\n throw err;\n }\n });\n}\n", "/* tslint:disable:no-empty */\nexport function noop() { }\n", "import { CompleteNotification, NextNotification, ErrorNotification } from './types';\n\n/**\n * A completion object optimized for memory use and created to be the\n * same \"shape\" as other notifications in v8.\n * @internal\n */\nexport const COMPLETE_NOTIFICATION = (() => createNotification('C', undefined, undefined) as CompleteNotification)();\n\n/**\n * Internal use only. Creates an optimized error notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function errorNotification(error: any): ErrorNotification {\n return createNotification('E', undefined, error) as any;\n}\n\n/**\n * Internal use only. Creates an optimized next notification that is the same \"shape\"\n * as other notifications.\n * @internal\n */\nexport function nextNotification(value: T) {\n return createNotification('N', value, undefined) as NextNotification;\n}\n\n/**\n * Ensures that all notifications created internally have the same \"shape\" in v8.\n *\n * TODO: This is only exported to support a crazy legacy test in `groupBy`.\n * @internal\n */\nexport function createNotification(kind: 'N' | 'E' | 'C', value: any, error: any) {\n return {\n kind,\n value,\n error,\n };\n}\n", "import { config } from '../config';\n\nlet context: { errorThrown: boolean; error: any } | null = null;\n\n/**\n * Handles dealing with errors for super-gross mode. Creates a context, in which\n * any synchronously thrown errors will be passed to {@link captureError}. Which\n * will record the error such that it will be rethrown after the call back is complete.\n * TODO: Remove in v8\n * @param cb An immediately executed function.\n */\nexport function errorContext(cb: () => void) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n const isRoot = !context;\n if (isRoot) {\n context = { errorThrown: false, error: null };\n }\n cb();\n if (isRoot) {\n const { errorThrown, error } = context!;\n context = null;\n if (errorThrown) {\n throw error;\n }\n }\n } else {\n // This is the general non-deprecated path for everyone that\n // isn't crazy enough to use super-gross mode (useDeprecatedSynchronousErrorHandling)\n cb();\n }\n}\n\n/**\n * Captures errors only in super-gross mode.\n * @param err the error to capture\n */\nexport function captureError(err: any) {\n if (config.useDeprecatedSynchronousErrorHandling && context) {\n context.errorThrown = true;\n context.error = err;\n }\n}\n", "import { isFunction } from './util/isFunction';\nimport { Observer, ObservableNotification } from './types';\nimport { isSubscription, Subscription } from './Subscription';\nimport { config } from './config';\nimport { reportUnhandledError } from './util/reportUnhandledError';\nimport { noop } from './util/noop';\nimport { nextNotification, errorNotification, COMPLETE_NOTIFICATION } from './NotificationFactories';\nimport { timeoutProvider } from './scheduler/timeoutProvider';\nimport { captureError } from './util/errorContext';\n\n/**\n * Implements the {@link Observer} interface and extends the\n * {@link Subscription} class. While the {@link Observer} is the public API for\n * consuming the values of an {@link Observable}, all Observers get converted to\n * a Subscriber, in order to provide Subscription-like capabilities such as\n * `unsubscribe`. Subscriber is a common type in RxJS, and crucial for\n * implementing operators, but it is rarely used as a public API.\n *\n * @class Subscriber\n */\nexport class Subscriber extends Subscription implements Observer {\n /**\n * A static factory for a Subscriber, given a (potentially partial) definition\n * of an Observer.\n * @param next The `next` callback of an Observer.\n * @param error The `error` callback of an\n * Observer.\n * @param complete The `complete` callback of an\n * Observer.\n * @return A Subscriber wrapping the (partially defined)\n * Observer represented by the given arguments.\n * @nocollapse\n * @deprecated Do not use. Will be removed in v8. There is no replacement for this\n * method, and there is no reason to be creating instances of `Subscriber` directly.\n * If you have a specific use case, please file an issue.\n */\n static create(next?: (x?: T) => void, error?: (e?: any) => void, complete?: () => void): Subscriber {\n return new SafeSubscriber(next, error, complete);\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected isStopped: boolean = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n protected destination: Subscriber | Observer; // this `any` is the escape hatch to erase extra type param (e.g. R)\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * There is no reason to directly create an instance of Subscriber. This type is exported for typings reasons.\n */\n constructor(destination?: Subscriber | Observer) {\n super();\n if (destination) {\n this.destination = destination;\n // Automatically chain subscriptions together here.\n // if destination is a Subscription, then it is a Subscriber.\n if (isSubscription(destination)) {\n destination.add(this);\n }\n } else {\n this.destination = EMPTY_OBSERVER;\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `next` from\n * the Observable, with a value. The Observable may call this method 0 or more\n * times.\n * @param {T} [value] The `next` value.\n * @return {void}\n */\n next(value?: T): void {\n if (this.isStopped) {\n handleStoppedNotification(nextNotification(value), this);\n } else {\n this._next(value!);\n }\n }\n\n /**\n * The {@link Observer} callback to receive notifications of type `error` from\n * the Observable, with an attached `Error`. Notifies the Observer that\n * the Observable has experienced an error condition.\n * @param {any} [err] The `error` exception.\n * @return {void}\n */\n error(err?: any): void {\n if (this.isStopped) {\n handleStoppedNotification(errorNotification(err), this);\n } else {\n this.isStopped = true;\n this._error(err);\n }\n }\n\n /**\n * The {@link Observer} callback to receive a valueless notification of type\n * `complete` from the Observable. Notifies the Observer that the Observable\n * has finished sending push-based notifications.\n * @return {void}\n */\n complete(): void {\n if (this.isStopped) {\n handleStoppedNotification(COMPLETE_NOTIFICATION, this);\n } else {\n this.isStopped = true;\n this._complete();\n }\n }\n\n unsubscribe(): void {\n if (!this.closed) {\n this.isStopped = true;\n super.unsubscribe();\n this.destination = null!;\n }\n }\n\n protected _next(value: T): void {\n this.destination.next(value);\n }\n\n protected _error(err: any): void {\n try {\n this.destination.error(err);\n } finally {\n this.unsubscribe();\n }\n }\n\n protected _complete(): void {\n try {\n this.destination.complete();\n } finally {\n this.unsubscribe();\n }\n }\n}\n\n/**\n * This bind is captured here because we want to be able to have\n * compatibility with monoid libraries that tend to use a method named\n * `bind`. In particular, a library called Monio requires this.\n */\nconst _bind = Function.prototype.bind;\n\nfunction bind any>(fn: Fn, thisArg: any): Fn {\n return _bind.call(fn, thisArg);\n}\n\n/**\n * Internal optimization only, DO NOT EXPOSE.\n * @internal\n */\nclass ConsumerObserver implements Observer {\n constructor(private partialObserver: Partial>) {}\n\n next(value: T): void {\n const { partialObserver } = this;\n if (partialObserver.next) {\n try {\n partialObserver.next(value);\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n\n error(err: any): void {\n const { partialObserver } = this;\n if (partialObserver.error) {\n try {\n partialObserver.error(err);\n } catch (error) {\n handleUnhandledError(error);\n }\n } else {\n handleUnhandledError(err);\n }\n }\n\n complete(): void {\n const { partialObserver } = this;\n if (partialObserver.complete) {\n try {\n partialObserver.complete();\n } catch (error) {\n handleUnhandledError(error);\n }\n }\n }\n}\n\nexport class SafeSubscriber extends Subscriber {\n constructor(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((e?: any) => void) | null,\n complete?: (() => void) | null\n ) {\n super();\n\n let partialObserver: Partial>;\n if (isFunction(observerOrNext) || !observerOrNext) {\n // The first argument is a function, not an observer. The next\n // two arguments *could* be observers, or they could be empty.\n partialObserver = {\n next: (observerOrNext ?? undefined) as (((value: T) => void) | undefined),\n error: error ?? undefined,\n complete: complete ?? undefined,\n };\n } else {\n // The first argument is a partial observer.\n let context: any;\n if (this && config.useDeprecatedNextContext) {\n // This is a deprecated path that made `this.unsubscribe()` available in\n // next handler functions passed to subscribe. This only exists behind a flag\n // now, as it is *very* slow.\n context = Object.create(observerOrNext);\n context.unsubscribe = () => this.unsubscribe();\n partialObserver = {\n next: observerOrNext.next && bind(observerOrNext.next, context),\n error: observerOrNext.error && bind(observerOrNext.error, context),\n complete: observerOrNext.complete && bind(observerOrNext.complete, context),\n };\n } else {\n // The \"normal\" path. Just use the partial observer directly.\n partialObserver = observerOrNext;\n }\n }\n\n // Wrap the partial observer to ensure it's a full observer, and\n // make sure proper error handling is accounted for.\n this.destination = new ConsumerObserver(partialObserver);\n }\n}\n\nfunction handleUnhandledError(error: any) {\n if (config.useDeprecatedSynchronousErrorHandling) {\n captureError(error);\n } else {\n // Ideal path, we report this as an unhandled error,\n // which is thrown on a new call stack.\n reportUnhandledError(error);\n }\n}\n\n/**\n * An error handler used when no error handler was supplied\n * to the SafeSubscriber -- meaning no error handler was supplied\n * do the `subscribe` call on our observable.\n * @param err The error to handle\n */\nfunction defaultErrorHandler(err: any) {\n throw err;\n}\n\n/**\n * A handler for notifications that cannot be sent to a stopped subscriber.\n * @param notification The notification being sent\n * @param subscriber The stopped subscriber\n */\nfunction handleStoppedNotification(notification: ObservableNotification, subscriber: Subscriber) {\n const { onStoppedNotification } = config;\n onStoppedNotification && timeoutProvider.setTimeout(() => onStoppedNotification(notification, subscriber));\n}\n\n/**\n * The observer used as a stub for subscriptions where the user did not\n * pass any arguments to `subscribe`. Comes with the default error handling\n * behavior.\n */\nexport const EMPTY_OBSERVER: Readonly> & { closed: true } = {\n closed: true,\n next: noop,\n error: defaultErrorHandler,\n complete: noop,\n};\n", "/**\n * Symbol.observable or a string \"@@observable\". Used for interop\n *\n * @deprecated We will no longer be exporting this symbol in upcoming versions of RxJS.\n * Instead polyfill and use Symbol.observable directly *or* use https://www.npmjs.com/package/symbol-observable\n */\nexport const observable: string | symbol = (() => (typeof Symbol === 'function' && Symbol.observable) || '@@observable')();\n", "/**\n * This function takes one parameter and just returns it. Simply put,\n * this is like `(x: T): T => x`.\n *\n * ## Examples\n *\n * This is useful in some cases when using things like `mergeMap`\n *\n * ```ts\n * import { interval, take, map, range, mergeMap, identity } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(5));\n *\n * const result$ = source$.pipe(\n * map(i => range(i)),\n * mergeMap(identity) // same as mergeMap(x => x)\n * );\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * Or when you want to selectively apply an operator\n *\n * ```ts\n * import { interval, take, identity } from 'rxjs';\n *\n * const shouldLimit = () => Math.random() < 0.5;\n *\n * const source$ = interval(1000);\n *\n * const result$ = source$.pipe(shouldLimit() ? take(5) : identity);\n *\n * result$.subscribe({\n * next: console.log\n * });\n * ```\n *\n * @param x Any value that is returned by this function\n * @returns The value passed as the first parameter to this function\n */\nexport function identity(x: T): T {\n return x;\n}\n", "import { identity } from './identity';\nimport { UnaryFunction } from '../types';\n\nexport function pipe(): typeof identity;\nexport function pipe(fn1: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction): UnaryFunction;\nexport function pipe(fn1: UnaryFunction, fn2: UnaryFunction, fn3: UnaryFunction): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction\n): UnaryFunction;\nexport function pipe(\n fn1: UnaryFunction,\n fn2: UnaryFunction,\n fn3: UnaryFunction,\n fn4: UnaryFunction,\n fn5: UnaryFunction,\n fn6: UnaryFunction,\n fn7: UnaryFunction,\n fn8: UnaryFunction,\n fn9: UnaryFunction,\n ...fns: UnaryFunction[]\n): UnaryFunction;\n\n/**\n * pipe() can be called on one or more functions, each of which can take one argument (\"UnaryFunction\")\n * and uses it to return a value.\n * It returns a function that takes one argument, passes it to the first UnaryFunction, and then\n * passes the result to the next one, passes that result to the next one, and so on. \n */\nexport function pipe(...fns: Array>): UnaryFunction {\n return pipeFromArray(fns);\n}\n\n/** @internal */\nexport function pipeFromArray(fns: Array>): UnaryFunction {\n if (fns.length === 0) {\n return identity as UnaryFunction;\n }\n\n if (fns.length === 1) {\n return fns[0];\n }\n\n return function piped(input: T): R {\n return fns.reduce((prev: any, fn: UnaryFunction) => fn(prev), input as any);\n };\n}\n", "import { Operator } from './Operator';\nimport { SafeSubscriber, Subscriber } from './Subscriber';\nimport { isSubscription, Subscription } from './Subscription';\nimport { TeardownLogic, OperatorFunction, Subscribable, Observer } from './types';\nimport { observable as Symbol_observable } from './symbol/observable';\nimport { pipeFromArray } from './util/pipe';\nimport { config } from './config';\nimport { isFunction } from './util/isFunction';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A representation of any set of values over any amount of time. This is the most basic building block\n * of RxJS.\n *\n * @class Observable\n */\nexport class Observable implements Subscribable {\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n source: Observable | undefined;\n\n /**\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n */\n operator: Operator | undefined;\n\n /**\n * @constructor\n * @param {Function} subscribe the function that is called when the Observable is\n * initially subscribed to. This function is given a Subscriber, to which new values\n * can be `next`ed, or an `error` method can be called to raise an error, or\n * `complete` can be called to notify of a successful completion.\n */\n constructor(subscribe?: (this: Observable, subscriber: Subscriber) => TeardownLogic) {\n if (subscribe) {\n this._subscribe = subscribe;\n }\n }\n\n // HACK: Since TypeScript inherits static properties too, we have to\n // fight against TypeScript here so Subject can have a different static create signature\n /**\n * Creates a new Observable by calling the Observable constructor\n * @owner Observable\n * @method create\n * @param {Function} subscribe? the subscriber function to be passed to the Observable constructor\n * @return {Observable} a new observable\n * @nocollapse\n * @deprecated Use `new Observable()` instead. Will be removed in v8.\n */\n static create: (...args: any[]) => any = (subscribe?: (subscriber: Subscriber) => TeardownLogic) => {\n return new Observable(subscribe);\n };\n\n /**\n * Creates a new Observable, with this Observable instance as the source, and the passed\n * operator defined as the new observable's operator.\n * @method lift\n * @param operator the operator defining the operation to take on the observable\n * @return a new observable with the Operator applied\n * @deprecated Internal implementation detail, do not use directly. Will be made internal in v8.\n * If you have implemented an operator using `lift`, it is recommended that you create an\n * operator by simply returning `new Observable()` directly. See \"Creating new operators from\n * scratch\" section here: https://rxjs.dev/guide/operators\n */\n lift(operator?: Operator): Observable {\n const observable = new Observable();\n observable.source = this;\n observable.operator = operator;\n return observable;\n }\n\n subscribe(observerOrNext?: Partial> | ((value: T) => void)): Subscription;\n /** @deprecated Instead of passing separate callback arguments, use an observer argument. Signatures taking separate callback arguments will be removed in v8. Details: https://rxjs.dev/deprecations/subscribe-arguments */\n subscribe(next?: ((value: T) => void) | null, error?: ((error: any) => void) | null, complete?: (() => void) | null): Subscription;\n /**\n * Invokes an execution of an Observable and registers Observer handlers for notifications it will emit.\n *\n * Use it when you have all these Observables, but still nothing is happening.\n *\n * `subscribe` is not a regular operator, but a method that calls Observable's internal `subscribe` function. It\n * might be for example a function that you passed to Observable's constructor, but most of the time it is\n * a library implementation, which defines what will be emitted by an Observable, and when it be will emitted. This means\n * that calling `subscribe` is actually the moment when Observable starts its work, not when it is created, as it is often\n * the thought.\n *\n * Apart from starting the execution of an Observable, this method allows you to listen for values\n * that an Observable emits, as well as for when it completes or errors. You can achieve this in two\n * of the following ways.\n *\n * The first way is creating an object that implements {@link Observer} interface. It should have methods\n * defined by that interface, but note that it should be just a regular JavaScript object, which you can create\n * yourself in any way you want (ES6 class, classic function constructor, object literal etc.). In particular, do\n * not attempt to use any RxJS implementation details to create Observers - you don't need them. Remember also\n * that your object does not have to implement all methods. If you find yourself creating a method that doesn't\n * do anything, you can simply omit it. Note however, if the `error` method is not provided and an error happens,\n * it will be thrown asynchronously. Errors thrown asynchronously cannot be caught using `try`/`catch`. Instead,\n * use the {@link onUnhandledError} configuration option or use a runtime handler (like `window.onerror` or\n * `process.on('error)`) to be notified of unhandled errors. Because of this, it's recommended that you provide\n * an `error` method to avoid missing thrown errors.\n *\n * The second way is to give up on Observer object altogether and simply provide callback functions in place of its methods.\n * This means you can provide three functions as arguments to `subscribe`, where the first function is equivalent\n * of a `next` method, the second of an `error` method and the third of a `complete` method. Just as in case of an Observer,\n * if you do not need to listen for something, you can omit a function by passing `undefined` or `null`,\n * since `subscribe` recognizes these functions by where they were placed in function call. When it comes\n * to the `error` function, as with an Observer, if not provided, errors emitted by an Observable will be thrown asynchronously.\n *\n * You can, however, subscribe with no parameters at all. This may be the case where you're not interested in terminal events\n * and you also handled emissions internally by using operators (e.g. using `tap`).\n *\n * Whichever style of calling `subscribe` you use, in both cases it returns a Subscription object.\n * This object allows you to call `unsubscribe` on it, which in turn will stop the work that an Observable does and will clean\n * up all resources that an Observable used. Note that cancelling a subscription will not call `complete` callback\n * provided to `subscribe` function, which is reserved for a regular completion signal that comes from an Observable.\n *\n * Remember that callbacks provided to `subscribe` are not guaranteed to be called asynchronously.\n * It is an Observable itself that decides when these functions will be called. For example {@link of}\n * by default emits all its values synchronously. Always check documentation for how given Observable\n * will behave when subscribed and if its default behavior can be modified with a `scheduler`.\n *\n * #### Examples\n *\n * Subscribe with an {@link guide/observer Observer}\n *\n * ```ts\n * import { of } from 'rxjs';\n *\n * const sumObserver = {\n * sum: 0,\n * next(value) {\n * console.log('Adding: ' + value);\n * this.sum = this.sum + value;\n * },\n * error() {\n * // We actually could just remove this method,\n * // since we do not really care about errors right now.\n * },\n * complete() {\n * console.log('Sum equals: ' + this.sum);\n * }\n * };\n *\n * of(1, 2, 3) // Synchronously emits 1, 2, 3 and then completes.\n * .subscribe(sumObserver);\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Subscribe with functions ({@link deprecations/subscribe-arguments deprecated})\n *\n * ```ts\n * import { of } from 'rxjs'\n *\n * let sum = 0;\n *\n * of(1, 2, 3).subscribe(\n * value => {\n * console.log('Adding: ' + value);\n * sum = sum + value;\n * },\n * undefined,\n * () => console.log('Sum equals: ' + sum)\n * );\n *\n * // Logs:\n * // 'Adding: 1'\n * // 'Adding: 2'\n * // 'Adding: 3'\n * // 'Sum equals: 6'\n * ```\n *\n * Cancel a subscription\n *\n * ```ts\n * import { interval } from 'rxjs';\n *\n * const subscription = interval(1000).subscribe({\n * next(num) {\n * console.log(num)\n * },\n * complete() {\n * // Will not be called, even when cancelling subscription.\n * console.log('completed!');\n * }\n * });\n *\n * setTimeout(() => {\n * subscription.unsubscribe();\n * console.log('unsubscribed!');\n * }, 2500);\n *\n * // Logs:\n * // 0 after 1s\n * // 1 after 2s\n * // 'unsubscribed!' after 2.5s\n * ```\n *\n * @param {Observer|Function} observerOrNext (optional) Either an observer with methods to be called,\n * or the first of three possible handlers, which is the handler for each value emitted from the subscribed\n * Observable.\n * @param {Function} error (optional) A handler for a terminal event resulting from an error. If no error handler is provided,\n * the error will be thrown asynchronously as unhandled.\n * @param {Function} complete (optional) A handler for a terminal event resulting from successful completion.\n * @return {Subscription} a subscription reference to the registered handlers\n * @method subscribe\n */\n subscribe(\n observerOrNext?: Partial> | ((value: T) => void) | null,\n error?: ((error: any) => void) | null,\n complete?: (() => void) | null\n ): Subscription {\n const subscriber = isSubscriber(observerOrNext) ? observerOrNext : new SafeSubscriber(observerOrNext, error, complete);\n\n errorContext(() => {\n const { operator, source } = this;\n subscriber.add(\n operator\n ? // We're dealing with a subscription in the\n // operator chain to one of our lifted operators.\n operator.call(subscriber, source)\n : source\n ? // If `source` has a value, but `operator` does not, something that\n // had intimate knowledge of our API, like our `Subject`, must have\n // set it. We're going to just call `_subscribe` directly.\n this._subscribe(subscriber)\n : // In all other cases, we're likely wrapping a user-provided initializer\n // function, so we need to catch errors and handle them appropriately.\n this._trySubscribe(subscriber)\n );\n });\n\n return subscriber;\n }\n\n /** @internal */\n protected _trySubscribe(sink: Subscriber): TeardownLogic {\n try {\n return this._subscribe(sink);\n } catch (err) {\n // We don't need to return anything in this case,\n // because it's just going to try to `add()` to a subscription\n // above.\n sink.error(err);\n }\n }\n\n /**\n * Used as a NON-CANCELLABLE means of subscribing to an observable, for use with\n * APIs that expect promises, like `async/await`. You cannot unsubscribe from this.\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * #### Example\n *\n * ```ts\n * import { interval, take } from 'rxjs';\n *\n * const source$ = interval(1000).pipe(take(4));\n *\n * async function getTotal() {\n * let total = 0;\n *\n * await source$.forEach(value => {\n * total += value;\n * console.log('observable -> ' + value);\n * });\n *\n * return total;\n * }\n *\n * getTotal().then(\n * total => console.log('Total: ' + total)\n * );\n *\n * // Expected:\n * // 'observable -> 0'\n * // 'observable -> 1'\n * // 'observable -> 2'\n * // 'observable -> 3'\n * // 'Total: 6'\n * ```\n *\n * @param next a handler for each value emitted by the observable\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n */\n forEach(next: (value: T) => void): Promise;\n\n /**\n * @param next a handler for each value emitted by the observable\n * @param promiseCtor a constructor function used to instantiate the Promise\n * @return a promise that either resolves on observable completion or\n * rejects with the handled error\n * @deprecated Passing a Promise constructor will no longer be available\n * in upcoming versions of RxJS. This is because it adds weight to the library, for very\n * little benefit. If you need this functionality, it is recommended that you either\n * polyfill Promise, or you create an adapter to convert the returned native promise\n * to whatever promise implementation you wanted. Will be removed in v8.\n */\n forEach(next: (value: T) => void, promiseCtor: PromiseConstructorLike): Promise;\n\n forEach(next: (value: T) => void, promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n const subscriber = new SafeSubscriber({\n next: (value) => {\n try {\n next(value);\n } catch (err) {\n reject(err);\n subscriber.unsubscribe();\n }\n },\n error: reject,\n complete: resolve,\n });\n this.subscribe(subscriber);\n }) as Promise;\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): TeardownLogic {\n return this.source?.subscribe(subscriber);\n }\n\n /**\n * An interop point defined by the es7-observable spec https://github.com/zenparsing/es-observable\n * @method Symbol.observable\n * @return {Observable} this instance of the observable\n */\n [Symbol_observable]() {\n return this;\n }\n\n /* tslint:disable:max-line-length */\n pipe(): Observable;\n pipe(op1: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction): Observable;\n pipe(op1: OperatorFunction, op2: OperatorFunction, op3: OperatorFunction): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction\n ): Observable;\n pipe(\n op1: OperatorFunction,\n op2: OperatorFunction,\n op3: OperatorFunction,\n op4: OperatorFunction,\n op5: OperatorFunction,\n op6: OperatorFunction,\n op7: OperatorFunction,\n op8: OperatorFunction,\n op9: OperatorFunction,\n ...operations: OperatorFunction[]\n ): Observable;\n /* tslint:enable:max-line-length */\n\n /**\n * Used to stitch together functional operators into a chain.\n * @method pipe\n * @return {Observable} the Observable result of all of the operators having\n * been called in the order they were passed in.\n *\n * ## Example\n *\n * ```ts\n * import { interval, filter, map, scan } from 'rxjs';\n *\n * interval(1000)\n * .pipe(\n * filter(x => x % 2 === 0),\n * map(x => x + x),\n * scan((acc, x) => acc + x)\n * )\n * .subscribe(x => console.log(x));\n * ```\n */\n pipe(...operations: OperatorFunction[]): Observable {\n return pipeFromArray(operations)(this);\n }\n\n /* tslint:disable:max-line-length */\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: typeof Promise): Promise;\n /** @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise */\n toPromise(PromiseCtor: PromiseConstructorLike): Promise;\n /* tslint:enable:max-line-length */\n\n /**\n * Subscribe to this Observable and get a Promise resolving on\n * `complete` with the last emission (if any).\n *\n * **WARNING**: Only use this with observables you *know* will complete. If the source\n * observable does not complete, you will end up with a promise that is hung up, and\n * potentially all of the state of an async function hanging out in memory. To avoid\n * this situation, look into adding something like {@link timeout}, {@link take},\n * {@link takeWhile}, or {@link takeUntil} amongst others.\n *\n * @method toPromise\n * @param [promiseCtor] a constructor function used to instantiate\n * the Promise\n * @return A Promise that resolves with the last value emit, or\n * rejects on an error. If there were no emissions, Promise\n * resolves with undefined.\n * @deprecated Replaced with {@link firstValueFrom} and {@link lastValueFrom}. Will be removed in v8. Details: https://rxjs.dev/deprecations/to-promise\n */\n toPromise(promiseCtor?: PromiseConstructorLike): Promise {\n promiseCtor = getPromiseCtor(promiseCtor);\n\n return new promiseCtor((resolve, reject) => {\n let value: T | undefined;\n this.subscribe(\n (x: T) => (value = x),\n (err: any) => reject(err),\n () => resolve(value)\n );\n }) as Promise;\n }\n}\n\n/**\n * Decides between a passed promise constructor from consuming code,\n * A default configured promise constructor, and the native promise\n * constructor and returns it. If nothing can be found, it will throw\n * an error.\n * @param promiseCtor The optional promise constructor to passed by consuming code\n */\nfunction getPromiseCtor(promiseCtor: PromiseConstructorLike | undefined) {\n return promiseCtor ?? config.Promise ?? Promise;\n}\n\nfunction isObserver(value: any): value is Observer {\n return value && isFunction(value.next) && isFunction(value.error) && isFunction(value.complete);\n}\n\nfunction isSubscriber(value: any): value is Subscriber {\n return (value && value instanceof Subscriber) || (isObserver(value) && isSubscription(value));\n}\n", "import { Observable } from '../Observable';\nimport { Subscriber } from '../Subscriber';\nimport { OperatorFunction } from '../types';\nimport { isFunction } from './isFunction';\n\n/**\n * Used to determine if an object is an Observable with a lift function.\n */\nexport function hasLift(source: any): source is { lift: InstanceType['lift'] } {\n return isFunction(source?.lift);\n}\n\n/**\n * Creates an `OperatorFunction`. Used to define operators throughout the library in a concise way.\n * @param init The logic to connect the liftedSource to the subscriber at the moment of subscription.\n */\nexport function operate(\n init: (liftedSource: Observable, subscriber: Subscriber) => (() => void) | void\n): OperatorFunction {\n return (source: Observable) => {\n if (hasLift(source)) {\n return source.lift(function (this: Subscriber, liftedSource: Observable) {\n try {\n return init(liftedSource, this);\n } catch (err) {\n this.error(err);\n }\n });\n }\n throw new TypeError('Unable to lift unknown Observable type');\n };\n}\n", "import { Subscriber } from '../Subscriber';\n\n/**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional teardown logic here. This will only be called on teardown if the\n * subscriber itself is not already closed. This is called after all other teardown logic is executed.\n */\nexport function createOperatorSubscriber(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n onFinalize?: () => void\n): Subscriber {\n return new OperatorSubscriber(destination, onNext, onComplete, onError, onFinalize);\n}\n\n/**\n * A generic helper for allowing operators to be created with a Subscriber and\n * use closures to capture necessary state from the operator function itself.\n */\nexport class OperatorSubscriber extends Subscriber {\n /**\n * Creates an instance of an `OperatorSubscriber`.\n * @param destination The downstream subscriber.\n * @param onNext Handles next values, only called if this subscriber is not stopped or closed. Any\n * error that occurs in this function is caught and sent to the `error` method of this subscriber.\n * @param onError Handles errors from the subscription, any errors that occur in this handler are caught\n * and send to the `destination` error handler.\n * @param onComplete Handles completion notification from the subscription. Any errors that occur in\n * this handler are sent to the `destination` error handler.\n * @param onFinalize Additional finalization logic here. This will only be called on finalization if the\n * subscriber itself is not already closed. This is called after all other finalization logic is executed.\n * @param shouldUnsubscribe An optional check to see if an unsubscribe call should truly unsubscribe.\n * NOTE: This currently **ONLY** exists to support the strange behavior of {@link groupBy}, where unsubscription\n * to the resulting observable does not actually disconnect from the source if there are active subscriptions\n * to any grouped observable. (DO NOT EXPOSE OR USE EXTERNALLY!!!)\n */\n constructor(\n destination: Subscriber,\n onNext?: (value: T) => void,\n onComplete?: () => void,\n onError?: (err: any) => void,\n private onFinalize?: () => void,\n private shouldUnsubscribe?: () => boolean\n ) {\n // It's important - for performance reasons - that all of this class's\n // members are initialized and that they are always initialized in the same\n // order. This will ensure that all OperatorSubscriber instances have the\n // same hidden class in V8. This, in turn, will help keep the number of\n // hidden classes involved in property accesses within the base class as\n // low as possible. If the number of hidden classes involved exceeds four,\n // the property accesses will become megamorphic and performance penalties\n // will be incurred - i.e. inline caches won't be used.\n //\n // The reasons for ensuring all instances have the same hidden class are\n // further discussed in this blog post from Benedikt Meurer:\n // https://benediktmeurer.de/2018/03/23/impact-of-polymorphism-on-component-based-frameworks-like-react/\n super(destination);\n this._next = onNext\n ? function (this: OperatorSubscriber, value: T) {\n try {\n onNext(value);\n } catch (err) {\n destination.error(err);\n }\n }\n : super._next;\n this._error = onError\n ? function (this: OperatorSubscriber, err: any) {\n try {\n onError(err);\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._error;\n this._complete = onComplete\n ? function (this: OperatorSubscriber) {\n try {\n onComplete();\n } catch (err) {\n // Send any errors that occur down stream.\n destination.error(err);\n } finally {\n // Ensure finalization.\n this.unsubscribe();\n }\n }\n : super._complete;\n }\n\n unsubscribe() {\n if (!this.shouldUnsubscribe || this.shouldUnsubscribe()) {\n const { closed } = this;\n super.unsubscribe();\n // Execute additional teardown if we have any and we didn't already do so.\n !closed && this.onFinalize?.();\n }\n }\n}\n", "import { Subscription } from '../Subscription';\n\ninterface AnimationFrameProvider {\n schedule(callback: FrameRequestCallback): Subscription;\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n delegate:\n | {\n requestAnimationFrame: typeof requestAnimationFrame;\n cancelAnimationFrame: typeof cancelAnimationFrame;\n }\n | undefined;\n}\n\nexport const animationFrameProvider: AnimationFrameProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n schedule(callback) {\n let request = requestAnimationFrame;\n let cancel: typeof cancelAnimationFrame | undefined = cancelAnimationFrame;\n const { delegate } = animationFrameProvider;\n if (delegate) {\n request = delegate.requestAnimationFrame;\n cancel = delegate.cancelAnimationFrame;\n }\n const handle = request((timestamp) => {\n // Clear the cancel function. The request has been fulfilled, so\n // attempting to cancel the request upon unsubscription would be\n // pointless.\n cancel = undefined;\n callback(timestamp);\n });\n return new Subscription(() => cancel?.(handle));\n },\n requestAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.requestAnimationFrame || requestAnimationFrame)(...args);\n },\n cancelAnimationFrame(...args) {\n const { delegate } = animationFrameProvider;\n return (delegate?.cancelAnimationFrame || cancelAnimationFrame)(...args);\n },\n delegate: undefined,\n};\n", "import { createErrorClass } from './createErrorClass';\n\nexport interface ObjectUnsubscribedError extends Error {}\n\nexport interface ObjectUnsubscribedErrorCtor {\n /**\n * @deprecated Internal implementation detail. Do not construct error instances.\n * Cannot be tagged as internal: https://github.com/ReactiveX/rxjs/issues/6269\n */\n new (): ObjectUnsubscribedError;\n}\n\n/**\n * An error thrown when an action is invalid because the object has been\n * unsubscribed.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n *\n * @class ObjectUnsubscribedError\n */\nexport const ObjectUnsubscribedError: ObjectUnsubscribedErrorCtor = createErrorClass(\n (_super) =>\n function ObjectUnsubscribedErrorImpl(this: any) {\n _super(this);\n this.name = 'ObjectUnsubscribedError';\n this.message = 'object unsubscribed';\n }\n);\n", "import { Operator } from './Operator';\nimport { Observable } from './Observable';\nimport { Subscriber } from './Subscriber';\nimport { Subscription, EMPTY_SUBSCRIPTION } from './Subscription';\nimport { Observer, SubscriptionLike, TeardownLogic } from './types';\nimport { ObjectUnsubscribedError } from './util/ObjectUnsubscribedError';\nimport { arrRemove } from './util/arrRemove';\nimport { errorContext } from './util/errorContext';\n\n/**\n * A Subject is a special type of Observable that allows values to be\n * multicasted to many Observers. Subjects are like EventEmitters.\n *\n * Every Subject is an Observable and an Observer. You can subscribe to a\n * Subject, and you can call next to feed values as well as error and complete.\n */\nexport class Subject extends Observable implements SubscriptionLike {\n closed = false;\n\n private currentObservers: Observer[] | null = null;\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n observers: Observer[] = [];\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n isStopped = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n hasError = false;\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n thrownError: any = null;\n\n /**\n * Creates a \"subject\" by basically gluing an observer to an observable.\n *\n * @nocollapse\n * @deprecated Recommended you do not use. Will be removed at some point in the future. Plans for replacement still under discussion.\n */\n static create: (...args: any[]) => any = (destination: Observer, source: Observable): AnonymousSubject => {\n return new AnonymousSubject(destination, source);\n };\n\n constructor() {\n // NOTE: This must be here to obscure Observable's constructor.\n super();\n }\n\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n lift(operator: Operator): Observable {\n const subject = new AnonymousSubject(this, this);\n subject.operator = operator as any;\n return subject as any;\n }\n\n /** @internal */\n protected _throwIfClosed() {\n if (this.closed) {\n throw new ObjectUnsubscribedError();\n }\n }\n\n next(value: T) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n if (!this.currentObservers) {\n this.currentObservers = Array.from(this.observers);\n }\n for (const observer of this.currentObservers) {\n observer.next(value);\n }\n }\n });\n }\n\n error(err: any) {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.hasError = this.isStopped = true;\n this.thrownError = err;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.error(err);\n }\n }\n });\n }\n\n complete() {\n errorContext(() => {\n this._throwIfClosed();\n if (!this.isStopped) {\n this.isStopped = true;\n const { observers } = this;\n while (observers.length) {\n observers.shift()!.complete();\n }\n }\n });\n }\n\n unsubscribe() {\n this.isStopped = this.closed = true;\n this.observers = this.currentObservers = null!;\n }\n\n get observed() {\n return this.observers?.length > 0;\n }\n\n /** @internal */\n protected _trySubscribe(subscriber: Subscriber): TeardownLogic {\n this._throwIfClosed();\n return super._trySubscribe(subscriber);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._checkFinalizedStatuses(subscriber);\n return this._innerSubscribe(subscriber);\n }\n\n /** @internal */\n protected _innerSubscribe(subscriber: Subscriber) {\n const { hasError, isStopped, observers } = this;\n if (hasError || isStopped) {\n return EMPTY_SUBSCRIPTION;\n }\n this.currentObservers = null;\n observers.push(subscriber);\n return new Subscription(() => {\n this.currentObservers = null;\n arrRemove(observers, subscriber);\n });\n }\n\n /** @internal */\n protected _checkFinalizedStatuses(subscriber: Subscriber) {\n const { hasError, thrownError, isStopped } = this;\n if (hasError) {\n subscriber.error(thrownError);\n } else if (isStopped) {\n subscriber.complete();\n }\n }\n\n /**\n * Creates a new Observable with this Subject as the source. You can do this\n * to create custom Observer-side logic of the Subject and conceal it from\n * code that uses the Observable.\n * @return {Observable} Observable that the Subject casts to\n */\n asObservable(): Observable {\n const observable: any = new Observable();\n observable.source = this;\n return observable;\n }\n}\n\n/**\n * @class AnonymousSubject\n */\nexport class AnonymousSubject extends Subject {\n constructor(\n /** @deprecated Internal implementation detail, do not use directly. Will be made internal in v8. */\n public destination?: Observer,\n source?: Observable\n ) {\n super();\n this.source = source;\n }\n\n next(value: T) {\n this.destination?.next?.(value);\n }\n\n error(err: any) {\n this.destination?.error?.(err);\n }\n\n complete() {\n this.destination?.complete?.();\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n return this.source?.subscribe(subscriber) ?? EMPTY_SUBSCRIPTION;\n }\n}\n", "import { TimestampProvider } from '../types';\n\ninterface DateTimestampProvider extends TimestampProvider {\n delegate: TimestampProvider | undefined;\n}\n\nexport const dateTimestampProvider: DateTimestampProvider = {\n now() {\n // Use the variable rather than `this` so that the function can be called\n // without being bound to the provider.\n return (dateTimestampProvider.delegate || Date).now();\n },\n delegate: undefined,\n};\n", "import { Subject } from './Subject';\nimport { TimestampProvider } from './types';\nimport { Subscriber } from './Subscriber';\nimport { Subscription } from './Subscription';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * A variant of {@link Subject} that \"replays\" old values to new subscribers by emitting them when they first subscribe.\n *\n * `ReplaySubject` has an internal buffer that will store a specified number of values that it has observed. Like `Subject`,\n * `ReplaySubject` \"observes\" values by having them passed to its `next` method. When it observes a value, it will store that\n * value for a time determined by the configuration of the `ReplaySubject`, as passed to its constructor.\n *\n * When a new subscriber subscribes to the `ReplaySubject` instance, it will synchronously emit all values in its buffer in\n * a First-In-First-Out (FIFO) manner. The `ReplaySubject` will also complete, if it has observed completion; and it will\n * error if it has observed an error.\n *\n * There are two main configuration items to be concerned with:\n *\n * 1. `bufferSize` - This will determine how many items are stored in the buffer, defaults to infinite.\n * 2. `windowTime` - The amount of time to hold a value in the buffer before removing it from the buffer.\n *\n * Both configurations may exist simultaneously. So if you would like to buffer a maximum of 3 values, as long as the values\n * are less than 2 seconds old, you could do so with a `new ReplaySubject(3, 2000)`.\n *\n * ### Differences with BehaviorSubject\n *\n * `BehaviorSubject` is similar to `new ReplaySubject(1)`, with a couple of exceptions:\n *\n * 1. `BehaviorSubject` comes \"primed\" with a single value upon construction.\n * 2. `ReplaySubject` will replay values, even after observing an error, where `BehaviorSubject` will not.\n *\n * @see {@link Subject}\n * @see {@link BehaviorSubject}\n * @see {@link shareReplay}\n */\nexport class ReplaySubject extends Subject {\n private _buffer: (T | number)[] = [];\n private _infiniteTimeWindow = true;\n\n /**\n * @param bufferSize The size of the buffer to replay on subscription\n * @param windowTime The amount of time the buffered items will stay buffered\n * @param timestampProvider An object with a `now()` method that provides the current timestamp. This is used to\n * calculate the amount of time something has been buffered.\n */\n constructor(\n private _bufferSize = Infinity,\n private _windowTime = Infinity,\n private _timestampProvider: TimestampProvider = dateTimestampProvider\n ) {\n super();\n this._infiniteTimeWindow = _windowTime === Infinity;\n this._bufferSize = Math.max(1, _bufferSize);\n this._windowTime = Math.max(1, _windowTime);\n }\n\n next(value: T): void {\n const { isStopped, _buffer, _infiniteTimeWindow, _timestampProvider, _windowTime } = this;\n if (!isStopped) {\n _buffer.push(value);\n !_infiniteTimeWindow && _buffer.push(_timestampProvider.now() + _windowTime);\n }\n this._trimBuffer();\n super.next(value);\n }\n\n /** @internal */\n protected _subscribe(subscriber: Subscriber): Subscription {\n this._throwIfClosed();\n this._trimBuffer();\n\n const subscription = this._innerSubscribe(subscriber);\n\n const { _infiniteTimeWindow, _buffer } = this;\n // We use a copy here, so reentrant code does not mutate our array while we're\n // emitting it to a new subscriber.\n const copy = _buffer.slice();\n for (let i = 0; i < copy.length && !subscriber.closed; i += _infiniteTimeWindow ? 1 : 2) {\n subscriber.next(copy[i] as T);\n }\n\n this._checkFinalizedStatuses(subscriber);\n\n return subscription;\n }\n\n private _trimBuffer() {\n const { _bufferSize, _timestampProvider, _buffer, _infiniteTimeWindow } = this;\n // If we don't have an infinite buffer size, and we're over the length,\n // use splice to truncate the old buffer values off. Note that we have to\n // double the size for instances where we're not using an infinite time window\n // because we're storing the values and the timestamps in the same array.\n const adjustedBufferSize = (_infiniteTimeWindow ? 1 : 2) * _bufferSize;\n _bufferSize < Infinity && adjustedBufferSize < _buffer.length && _buffer.splice(0, _buffer.length - adjustedBufferSize);\n\n // Now, if we're not in an infinite time window, remove all values where the time is\n // older than what is allowed.\n if (!_infiniteTimeWindow) {\n const now = _timestampProvider.now();\n let last = 0;\n // Search the array for the first timestamp that isn't expired and\n // truncate the buffer up to that point.\n for (let i = 1; i < _buffer.length && (_buffer[i] as number) <= now; i += 2) {\n last = i;\n }\n last && _buffer.splice(0, last + 1);\n }\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Subscription } from '../Subscription';\nimport { SchedulerAction } from '../types';\n\n/**\n * A unit of work to be executed in a `scheduler`. An action is typically\n * created from within a {@link SchedulerLike} and an RxJS user does not need to concern\n * themselves about creating and manipulating an Action.\n *\n * ```ts\n * class Action extends Subscription {\n * new (scheduler: Scheduler, work: (state?: T) => void);\n * schedule(state?: T, delay: number = 0): Subscription;\n * }\n * ```\n *\n * @class Action\n */\nexport class Action extends Subscription {\n constructor(scheduler: Scheduler, work: (this: SchedulerAction, state?: T) => void) {\n super();\n }\n /**\n * Schedules this action on its parent {@link SchedulerLike} for execution. May be passed\n * some context object, `state`. May happen at some point in the future,\n * according to the `delay` parameter, if specified.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler.\n * @return {void}\n */\n public schedule(state?: T, delay: number = 0): Subscription {\n return this;\n }\n}\n", "import type { TimerHandle } from './timerHandle';\ntype SetIntervalFunction = (handler: () => void, timeout?: number, ...args: any[]) => TimerHandle;\ntype ClearIntervalFunction = (handle: TimerHandle) => void;\n\ninterface IntervalProvider {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n delegate:\n | {\n setInterval: SetIntervalFunction;\n clearInterval: ClearIntervalFunction;\n }\n | undefined;\n}\n\nexport const intervalProvider: IntervalProvider = {\n // When accessing the delegate, use the variable rather than `this` so that\n // the functions can be called without being bound to the provider.\n setInterval(handler: () => void, timeout?: number, ...args) {\n const { delegate } = intervalProvider;\n if (delegate?.setInterval) {\n return delegate.setInterval(handler, timeout, ...args);\n }\n return setInterval(handler, timeout, ...args);\n },\n clearInterval(handle) {\n const { delegate } = intervalProvider;\n return (delegate?.clearInterval || clearInterval)(handle as any);\n },\n delegate: undefined,\n};\n", "import { Action } from './Action';\nimport { SchedulerAction } from '../types';\nimport { Subscription } from '../Subscription';\nimport { AsyncScheduler } from './AsyncScheduler';\nimport { intervalProvider } from './intervalProvider';\nimport { arrRemove } from '../util/arrRemove';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncAction extends Action {\n public id: TimerHandle | undefined;\n public state?: T;\n // @ts-ignore: Property has no initializer and is not definitely assigned\n public delay: number;\n protected pending: boolean = false;\n\n constructor(protected scheduler: AsyncScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n public schedule(state?: T, delay: number = 0): Subscription {\n if (this.closed) {\n return this;\n }\n\n // Always replace the current state with the new state.\n this.state = state;\n\n const id = this.id;\n const scheduler = this.scheduler;\n\n //\n // Important implementation note:\n //\n // Actions only execute once by default, unless rescheduled from within the\n // scheduled callback. This allows us to implement single and repeat\n // actions via the same code path, without adding API surface area, as well\n // as mimic traditional recursion but across asynchronous boundaries.\n //\n // However, JS runtimes and timers distinguish between intervals achieved by\n // serial `setTimeout` calls vs. a single `setInterval` call. An interval of\n // serial `setTimeout` calls can be individually delayed, which delays\n // scheduling the next `setTimeout`, and so on. `setInterval` attempts to\n // guarantee the interval callback will be invoked more precisely to the\n // interval period, regardless of load.\n //\n // Therefore, we use `setInterval` to schedule single and repeat actions.\n // If the action reschedules itself with the same delay, the interval is not\n // canceled. If the action doesn't reschedule, or reschedules with a\n // different delay, the interval will be canceled after scheduled callback\n // execution.\n //\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, delay);\n }\n\n // Set the pending flag indicating that this action has been scheduled, or\n // has recursively rescheduled itself.\n this.pending = true;\n\n this.delay = delay;\n // If this action has already an async Id, don't request a new one.\n this.id = this.id ?? this.requestAsyncId(scheduler, this.id, delay);\n\n return this;\n }\n\n protected requestAsyncId(scheduler: AsyncScheduler, _id?: TimerHandle, delay: number = 0): TimerHandle {\n return intervalProvider.setInterval(scheduler.flush.bind(scheduler, this), delay);\n }\n\n protected recycleAsyncId(_scheduler: AsyncScheduler, id?: TimerHandle, delay: number | null = 0): TimerHandle | undefined {\n // If this action is rescheduled with the same delay time, don't clear the interval id.\n if (delay != null && this.delay === delay && this.pending === false) {\n return id;\n }\n // Otherwise, if the action's delay time is different from the current delay,\n // or the action has been rescheduled before it's executed, clear the interval id\n if (id != null) {\n intervalProvider.clearInterval(id);\n }\n\n return undefined;\n }\n\n /**\n * Immediately executes this action and the `work` it contains.\n * @return {any}\n */\n public execute(state: T, delay: number): any {\n if (this.closed) {\n return new Error('executing a cancelled action');\n }\n\n this.pending = false;\n const error = this._execute(state, delay);\n if (error) {\n return error;\n } else if (this.pending === false && this.id != null) {\n // Dequeue if the action didn't reschedule itself. Don't call\n // unsubscribe(), because the action could reschedule later.\n // For example:\n // ```\n // scheduler.schedule(function doWork(counter) {\n // /* ... I'm a busy worker bee ... */\n // var originalAction = this;\n // /* wait 100ms before rescheduling the action */\n // setTimeout(function () {\n // originalAction.schedule(counter + 1);\n // }, 100);\n // }, 1000);\n // ```\n this.id = this.recycleAsyncId(this.scheduler, this.id, null);\n }\n }\n\n protected _execute(state: T, _delay: number): any {\n let errored: boolean = false;\n let errorValue: any;\n try {\n this.work(state);\n } catch (e) {\n errored = true;\n // HACK: Since code elsewhere is relying on the \"truthiness\" of the\n // return here, we can't have it return \"\" or 0 or false.\n // TODO: Clean this up when we refactor schedulers mid-version-8 or so.\n errorValue = e ? e : new Error('Scheduled action threw falsy error');\n }\n if (errored) {\n this.unsubscribe();\n return errorValue;\n }\n }\n\n unsubscribe() {\n if (!this.closed) {\n const { id, scheduler } = this;\n const { actions } = scheduler;\n\n this.work = this.state = this.scheduler = null!;\n this.pending = false;\n\n arrRemove(actions, this);\n if (id != null) {\n this.id = this.recycleAsyncId(scheduler, id, null);\n }\n\n this.delay = null!;\n super.unsubscribe();\n }\n }\n}\n", "import { Action } from './scheduler/Action';\nimport { Subscription } from './Subscription';\nimport { SchedulerLike, SchedulerAction } from './types';\nimport { dateTimestampProvider } from './scheduler/dateTimestampProvider';\n\n/**\n * An execution context and a data structure to order tasks and schedule their\n * execution. Provides a notion of (potentially virtual) time, through the\n * `now()` getter method.\n *\n * Each unit of work in a Scheduler is called an `Action`.\n *\n * ```ts\n * class Scheduler {\n * now(): number;\n * schedule(work, delay?, state?): Subscription;\n * }\n * ```\n *\n * @class Scheduler\n * @deprecated Scheduler is an internal implementation detail of RxJS, and\n * should not be used directly. Rather, create your own class and implement\n * {@link SchedulerLike}. Will be made internal in v8.\n */\nexport class Scheduler implements SchedulerLike {\n public static now: () => number = dateTimestampProvider.now;\n\n constructor(private schedulerActionCtor: typeof Action, now: () => number = Scheduler.now) {\n this.now = now;\n }\n\n /**\n * A getter method that returns a number representing the current time\n * (at the time this function was called) according to the scheduler's own\n * internal clock.\n * @return {number} A number that represents the current time. May or may not\n * have a relation to wall-clock time. May or may not refer to a time unit\n * (e.g. milliseconds).\n */\n public now: () => number;\n\n /**\n * Schedules a function, `work`, for execution. May happen at some point in\n * the future, according to the `delay` parameter, if specified. May be passed\n * some context object, `state`, which will be passed to the `work` function.\n *\n * The given arguments will be processed an stored as an Action object in a\n * queue of actions.\n *\n * @param {function(state: ?T): ?Subscription} work A function representing a\n * task, or some unit of work to be executed by the Scheduler.\n * @param {number} [delay] Time to wait before executing the work, where the\n * time unit is implicit and defined by the Scheduler itself.\n * @param {T} [state] Some contextual data that the `work` function uses when\n * called by the Scheduler.\n * @return {Subscription} A subscription in order to be able to unsubscribe\n * the scheduled work.\n */\n public schedule(work: (this: SchedulerAction, state?: T) => void, delay: number = 0, state?: T): Subscription {\n return new this.schedulerActionCtor(this, work).schedule(state, delay);\n }\n}\n", "import { Scheduler } from '../Scheduler';\nimport { Action } from './Action';\nimport { AsyncAction } from './AsyncAction';\nimport { TimerHandle } from './timerHandle';\n\nexport class AsyncScheduler extends Scheduler {\n public actions: Array> = [];\n /**\n * A flag to indicate whether the Scheduler is currently executing a batch of\n * queued actions.\n * @type {boolean}\n * @internal\n */\n public _active: boolean = false;\n /**\n * An internal ID used to track the latest asynchronous task such as those\n * coming from `setTimeout`, `setInterval`, `requestAnimationFrame`, and\n * others.\n * @type {any}\n * @internal\n */\n public _scheduled: TimerHandle | undefined;\n\n constructor(SchedulerAction: typeof Action, now: () => number = Scheduler.now) {\n super(SchedulerAction, now);\n }\n\n public flush(action: AsyncAction): void {\n const { actions } = this;\n\n if (this._active) {\n actions.push(action);\n return;\n }\n\n let error: any;\n this._active = true;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions.shift()!)); // exhaust the scheduler queue\n\n this._active = false;\n\n if (error) {\n while ((action = actions.shift()!)) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\n/**\n *\n * Async Scheduler\n *\n * Schedule task as if you used setTimeout(task, duration)\n *\n * `async` scheduler schedules tasks asynchronously, by putting them on the JavaScript\n * event loop queue. It is best used to delay tasks in time or to schedule tasks repeating\n * in intervals.\n *\n * If you just want to \"defer\" task, that is to perform it right after currently\n * executing synchronous code ends (commonly achieved by `setTimeout(deferredTask, 0)`),\n * better choice will be the {@link asapScheduler} scheduler.\n *\n * ## Examples\n * Use async scheduler to delay task\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * const task = () => console.log('it works!');\n *\n * asyncScheduler.schedule(task, 2000);\n *\n * // After 2 seconds logs:\n * // \"it works!\"\n * ```\n *\n * Use async scheduler to repeat task in intervals\n * ```ts\n * import { asyncScheduler } from 'rxjs';\n *\n * function task(state) {\n * console.log(state);\n * this.schedule(state + 1, 1000); // `this` references currently executing Action,\n * // which we reschedule with new state and delay\n * }\n *\n * asyncScheduler.schedule(task, 3000, 0);\n *\n * // Logs:\n * // 0 after 3s\n * // 1 after 4s\n * // 2 after 5s\n * // 3 after 6s\n * ```\n */\n\nexport const asyncScheduler = new AsyncScheduler(AsyncAction);\n\n/**\n * @deprecated Renamed to {@link asyncScheduler}. Will be removed in v8.\n */\nexport const async = asyncScheduler;\n", "import { AsyncAction } from './AsyncAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\nimport { SchedulerAction } from '../types';\nimport { animationFrameProvider } from './animationFrameProvider';\nimport { TimerHandle } from './timerHandle';\n\nexport class AnimationFrameAction extends AsyncAction {\n constructor(protected scheduler: AnimationFrameScheduler, protected work: (this: SchedulerAction, state?: T) => void) {\n super(scheduler, work);\n }\n\n protected requestAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle {\n // If delay is greater than 0, request as an async action.\n if (delay !== null && delay > 0) {\n return super.requestAsyncId(scheduler, id, delay);\n }\n // Push the action to the end of the scheduler queue.\n scheduler.actions.push(this);\n // If an animation frame has already been requested, don't request another\n // one. If an animation frame hasn't been requested yet, request one. Return\n // the current animation frame request id.\n return scheduler._scheduled || (scheduler._scheduled = animationFrameProvider.requestAnimationFrame(() => scheduler.flush(undefined)));\n }\n\n protected recycleAsyncId(scheduler: AnimationFrameScheduler, id?: TimerHandle, delay: number = 0): TimerHandle | undefined {\n // If delay exists and is greater than 0, or if the delay is null (the\n // action wasn't rescheduled) but was originally scheduled as an async\n // action, then recycle as an async action.\n if (delay != null ? delay > 0 : this.delay > 0) {\n return super.recycleAsyncId(scheduler, id, delay);\n }\n // If the scheduler queue has no remaining actions with the same async id,\n // cancel the requested animation frame and set the scheduled flag to\n // undefined so the next AnimationFrameAction will request its own.\n const { actions } = scheduler;\n if (id != null && actions[actions.length - 1]?.id !== id) {\n animationFrameProvider.cancelAnimationFrame(id as number);\n scheduler._scheduled = undefined;\n }\n // Return undefined so the action knows to request a new async id if it's rescheduled.\n return undefined;\n }\n}\n", "import { AsyncAction } from './AsyncAction';\nimport { AsyncScheduler } from './AsyncScheduler';\n\nexport class AnimationFrameScheduler extends AsyncScheduler {\n public flush(action?: AsyncAction): void {\n this._active = true;\n // The async id that effects a call to flush is stored in _scheduled.\n // Before executing an action, it's necessary to check the action's async\n // id to determine whether it's supposed to be executed in the current\n // flush.\n // Previous implementations of this method used a count to determine this,\n // but that was unsound, as actions that are unsubscribed - i.e. cancelled -\n // are removed from the actions array and that can shift actions that are\n // scheduled to be executed in a subsequent flush into positions at which\n // they are executed within the current flush.\n const flushId = this._scheduled;\n this._scheduled = undefined;\n\n const { actions } = this;\n let error: any;\n action = action || actions.shift()!;\n\n do {\n if ((error = action.execute(action.state, action.delay))) {\n break;\n }\n } while ((action = actions[0]) && action.id === flushId && actions.shift());\n\n this._active = false;\n\n if (error) {\n while ((action = actions[0]) && action.id === flushId && actions.shift()) {\n action.unsubscribe();\n }\n throw error;\n }\n }\n}\n", "import { AnimationFrameAction } from './AnimationFrameAction';\nimport { AnimationFrameScheduler } from './AnimationFrameScheduler';\n\n/**\n *\n * Animation Frame Scheduler\n *\n * Perform task when `window.requestAnimationFrame` would fire\n *\n * When `animationFrame` scheduler is used with delay, it will fall back to {@link asyncScheduler} scheduler\n * behaviour.\n *\n * Without delay, `animationFrame` scheduler can be used to create smooth browser animations.\n * It makes sure scheduled task will happen just before next browser content repaint,\n * thus performing animations as efficiently as possible.\n *\n * ## Example\n * Schedule div height animation\n * ```ts\n * // html:
\n * import { animationFrameScheduler } from 'rxjs';\n *\n * const div = document.querySelector('div');\n *\n * animationFrameScheduler.schedule(function(height) {\n * div.style.height = height + \"px\";\n *\n * this.schedule(height + 1); // `this` references currently executing Action,\n * // which we reschedule with new state\n * }, 0, 0);\n *\n * // You will see a div element growing in height\n * ```\n */\n\nexport const animationFrameScheduler = new AnimationFrameScheduler(AnimationFrameAction);\n\n/**\n * @deprecated Renamed to {@link animationFrameScheduler}. Will be removed in v8.\n */\nexport const animationFrame = animationFrameScheduler;\n", "import { Observable } from '../Observable';\nimport { SchedulerLike } from '../types';\n\n/**\n * A simple Observable that emits no items to the Observer and immediately\n * emits a complete notification.\n *\n * Just emits 'complete', and nothing else.\n *\n * ![](empty.png)\n *\n * A simple Observable that only emits the complete notification. It can be used\n * for composing with other Observables, such as in a {@link mergeMap}.\n *\n * ## Examples\n *\n * Log complete notification\n *\n * ```ts\n * import { EMPTY } from 'rxjs';\n *\n * EMPTY.subscribe({\n * next: () => console.log('Next'),\n * complete: () => console.log('Complete!')\n * });\n *\n * // Outputs\n * // Complete!\n * ```\n *\n * Emit the number 7, then complete\n *\n * ```ts\n * import { EMPTY, startWith } from 'rxjs';\n *\n * const result = EMPTY.pipe(startWith(7));\n * result.subscribe(x => console.log(x));\n *\n * // Outputs\n * // 7\n * ```\n *\n * Map and flatten only odd numbers to the sequence `'a'`, `'b'`, `'c'`\n *\n * ```ts\n * import { interval, mergeMap, of, EMPTY } from 'rxjs';\n *\n * const interval$ = interval(1000);\n * const result = interval$.pipe(\n * mergeMap(x => x % 2 === 1 ? of('a', 'b', 'c') : EMPTY),\n * );\n * result.subscribe(x => console.log(x));\n *\n * // Results in the following to the console:\n * // x is equal to the count on the interval, e.g. (0, 1, 2, 3, ...)\n * // x will occur every 1000ms\n * // if x % 2 is equal to 1, print a, b, c (each on its own)\n * // if x % 2 is not equal to 1, nothing will be output\n * ```\n *\n * @see {@link Observable}\n * @see {@link NEVER}\n * @see {@link of}\n * @see {@link throwError}\n */\nexport const EMPTY = new Observable((subscriber) => subscriber.complete());\n\n/**\n * @param scheduler A {@link SchedulerLike} to use for scheduling\n * the emission of the complete notification.\n * @deprecated Replaced with the {@link EMPTY} constant or {@link scheduled} (e.g. `scheduled([], scheduler)`). Will be removed in v8.\n */\nexport function empty(scheduler?: SchedulerLike) {\n return scheduler ? emptyScheduled(scheduler) : EMPTY;\n}\n\nfunction emptyScheduled(scheduler: SchedulerLike) {\n return new Observable((subscriber) => scheduler.schedule(() => subscriber.complete()));\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport function isScheduler(value: any): value is SchedulerLike {\n return value && isFunction(value.schedule);\n}\n", "import { SchedulerLike } from '../types';\nimport { isFunction } from './isFunction';\nimport { isScheduler } from './isScheduler';\n\nfunction last(arr: T[]): T | undefined {\n return arr[arr.length - 1];\n}\n\nexport function popResultSelector(args: any[]): ((...args: unknown[]) => unknown) | undefined {\n return isFunction(last(args)) ? args.pop() : undefined;\n}\n\nexport function popScheduler(args: any[]): SchedulerLike | undefined {\n return isScheduler(last(args)) ? args.pop() : undefined;\n}\n\nexport function popNumber(args: any[], defaultValue: number): number {\n return typeof last(args) === 'number' ? args.pop()! : defaultValue;\n}\n", "export const isArrayLike = ((x: any): x is ArrayLike => x && typeof x.length === 'number' && typeof x !== 'function');", "import { isFunction } from \"./isFunction\";\n\n/**\n * Tests to see if the object is \"thennable\".\n * @param value the object to test\n */\nexport function isPromise(value: any): value is PromiseLike {\n return isFunction(value?.then);\n}\n", "import { InteropObservable } from '../types';\nimport { observable as Symbol_observable } from '../symbol/observable';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being Observable (but not necessary an Rx Observable) */\nexport function isInteropObservable(input: any): input is InteropObservable {\n return isFunction(input[Symbol_observable]);\n}\n", "import { isFunction } from './isFunction';\n\nexport function isAsyncIterable(obj: any): obj is AsyncIterable {\n return Symbol.asyncIterator && isFunction(obj?.[Symbol.asyncIterator]);\n}\n", "/**\n * Creates the TypeError to throw if an invalid object is passed to `from` or `scheduled`.\n * @param input The object that was passed.\n */\nexport function createInvalidObservableTypeError(input: any) {\n // TODO: We should create error codes that can be looked up, so this can be less verbose.\n return new TypeError(\n `You provided ${\n input !== null && typeof input === 'object' ? 'an invalid object' : `'${input}'`\n } where a stream was expected. You can provide an Observable, Promise, ReadableStream, Array, AsyncIterable, or Iterable.`\n );\n}\n", "export function getSymbolIterator(): symbol {\n if (typeof Symbol !== 'function' || !Symbol.iterator) {\n return '@@iterator' as any;\n }\n\n return Symbol.iterator;\n}\n\nexport const iterator = getSymbolIterator();\n", "import { iterator as Symbol_iterator } from '../symbol/iterator';\nimport { isFunction } from './isFunction';\n\n/** Identifies an input as being an Iterable */\nexport function isIterable(input: any): input is Iterable {\n return isFunction(input?.[Symbol_iterator]);\n}\n", "import { ReadableStreamLike } from '../types';\nimport { isFunction } from './isFunction';\n\nexport async function* readableStreamLikeToAsyncGenerator(readableStream: ReadableStreamLike): AsyncGenerator {\n const reader = readableStream.getReader();\n try {\n while (true) {\n const { value, done } = await reader.read();\n if (done) {\n return;\n }\n yield value!;\n }\n } finally {\n reader.releaseLock();\n }\n}\n\nexport function isReadableStreamLike(obj: any): obj is ReadableStreamLike {\n // We don't want to use instanceof checks because they would return\n // false for instances from another Realm, like an