From c7f90a688380707af56f5edfa6e3e4d13377de03 Mon Sep 17 00:00:00 2001
From: Mikhail Sveshnikov <mike0sv@gmail.com>
Date: Fri, 12 Nov 2021 01:20:45 +0300
Subject: [PATCH] fix dvc download (#109)

* fix dvc download

* fix lazy field for dataset
add gitpython to deps
fix dvc downloading

* conditional pull

* fix isadir error

* fix catboost and upload saving

* fix tests

* fix #83

* fix requirement duplication
requirements tests
requirement expand

* fix test

* spelling
---
 mlem/cli/apply.py               |  12 +++-
 mlem/contrib/catboost.py        |   3 +
 mlem/contrib/dvc.py             |  41 +++++++++--
 mlem/contrib/lightgbm.py        |  13 ++++
 mlem/contrib/sklearn.py         |   4 +-
 mlem/contrib/xgboost.py         |  12 ++++
 mlem/core/artifacts.py          |  16 +++--
 mlem/core/errors.py             |   8 +++
 mlem/core/hooks.py              |  10 +--
 mlem/core/meta_io.py            |   2 +-
 mlem/core/metadata.py           |   3 +-
 mlem/core/model.py              |   5 +-
 mlem/core/objects.py            |  18 +++--
 mlem/core/requirements.py       | 110 ++++++++++++++++++++--------
 mlem/polydantic/core.py         |   6 +-
 mlem/polydantic/lazy.py         |  41 ++++++++---
 mlem/utils/module.py            |   9 ++-
 setup.py                        |   1 +
 tests/cli/test_apply.py         |  12 ++++
 tests/conftest.py               |   6 +-
 tests/contrib/test_catboost.py  |   2 +-
 tests/contrib/test_lightgbm.py  |   3 +-
 tests/contrib/test_sklearn.py   |  51 +++++++++++++
 tests/contrib/test_xgboost.py   |   3 +-
 tests/core/test_artifacts.py    |  22 +++++-
 tests/core/test_meta_io.py      |   3 +-
 tests/core/test_metadata.py     |   2 +
 tests/core/test_objects.py      |  10 ++-
 tests/core/test_requirements.py | 124 ++++++++++++++++++++++++++++++++
 tests/polydantic/test_lazy.py   |  24 ++++++-
 30 files changed, 496 insertions(+), 80 deletions(-)
 create mode 100644 tests/core/test_requirements.py

diff --git a/mlem/cli/apply.py b/mlem/cli/apply.py
index ab1a0ac4..09a47b13 100644
--- a/mlem/cli/apply.py
+++ b/mlem/cli/apply.py
@@ -1,4 +1,4 @@
-from typing import Any, Tuple
+from typing import Any, Optional, Tuple
 
 import click
 
@@ -25,13 +25,19 @@
     help="Whether to create links for outputs in .mlem directory.",
 )
 def apply(
-    model: ModelMeta, output: str, method: str, args: Tuple[Any], link: bool
+    model: ModelMeta,
+    output: Optional[str],
+    method: str,
+    args: Tuple[Any],
+    link: bool,
 ):
     """Apply a model to supplied data."""
     from mlem.api import apply
 
     click.echo("applying")
-    apply(model, *args, method=method, output=output, link=link)
+    result = apply(model, *args, method=method, output=output, link=link)
+    if output is None:
+        click.echo(result)
 
 
 @mlem_command()
diff --git a/mlem/contrib/catboost.py b/mlem/contrib/catboost.py
index a7b948d2..2e97423a 100644
--- a/mlem/contrib/catboost.py
+++ b/mlem/contrib/catboost.py
@@ -57,6 +57,9 @@ def _get_model_file_name(self, model):
             return self.classifier_file_name
         return self.regressor_file_name
 
+    class Config:
+        use_enum_values = True
+
 
 class CatBoostModel(ModelType, ModelHook):
     """
diff --git a/mlem/contrib/dvc.py b/mlem/contrib/dvc.py
index 108cba86..a6256f10 100644
--- a/mlem/contrib/dvc.py
+++ b/mlem/contrib/dvc.py
@@ -1,13 +1,30 @@
 import contextlib
+import os.path
 from typing import IO, ClassVar, Iterator, Tuple
 from urllib.parse import unquote_plus
 
 from fsspec import AbstractFileSystem
 from fsspec.implementations.github import GithubFileSystem
+from fsspec.implementations.local import LocalFileSystem
 
 from mlem.core.artifacts import LocalArtifact, LocalStorage, Storage
 from mlem.core.meta_io import get_fs
 
+BATCH_SIZE = 10 ** 5
+
+
+def find_dvc_repo_root(path: str):
+    from dvc.exceptions import NotDvcRepoError
+
+    _path = path[:]
+    while True:
+        if os.path.isdir(os.path.join(_path, ".dvc")):
+            return _path
+        if _path == "/":
+            break
+        _path = os.path.dirname(_path)
+    raise NotDvcRepoError(f"Path {path} is not in dvc repo")
+
 
 class DVCStorage(LocalStorage):
     """For now this storage is user-managed dvc storage, which means user should
@@ -38,9 +55,13 @@ class DVCArtifact(LocalArtifact):
     uri: str
 
     def download(self, target_path: str) -> LocalArtifact:
-        from dvc.repo import Repo
-
-        Repo.get_url(self.uri, out=target_path)
+        with self.open() as fin, open(
+            os.path.join(target_path, os.path.basename(self.uri)), "wb"
+        ) as fout:
+            batch = fin.read(BATCH_SIZE)
+            while batch:
+                fout.write(batch)
+                batch = fin.read(BATCH_SIZE)
         return LocalArtifact(uri=target_path)
 
     @contextlib.contextmanager
@@ -58,9 +79,17 @@ def open(self) -> Iterator[IO]:
                 mode="rb",
             ) as f:
                 yield f
-        else:
-            with fs.open(path) as f:
-                yield f
+                return
+        elif isinstance(fs, LocalFileSystem):
+            if not os.path.exists(path):
+                root = find_dvc_repo_root(path)
+                # alternative caching impl
+                # Repo(root).pull(os.path.relpath(path, root))
+                with open(os.path.relpath(path, root), mode="rb") as f:
+                    yield f
+                    return
+        with fs.open(path) as f:
+            yield f
 
     def relative(self, fs: AbstractFileSystem, path: str) -> "DVCArtifact":
         relative = super().relative(fs, path)
diff --git a/mlem/contrib/lightgbm.py b/mlem/contrib/lightgbm.py
index d8cd65b4..2d8b1861 100644
--- a/mlem/contrib/lightgbm.py
+++ b/mlem/contrib/lightgbm.py
@@ -16,7 +16,9 @@
 from mlem.core.hooks import IsInstanceHookMixin
 from mlem.core.model import ModelHook, ModelIO, ModelType, Signature
 from mlem.core.requirements import (
+    AddRequirementHook,
     InstallableRequirement,
+    Requirement,
     Requirements,
     UnixPackageRequirement,
 )
@@ -131,3 +133,14 @@ def get_requirements(self) -> Requirements:
             + InstallableRequirement.from_module(mod=lgb)
             + LGB_REQUIREMENT
         )
+
+
+class LGBMLibgompHook(AddRequirementHook):
+    to_add = LGB_REQUIREMENT
+
+    @classmethod
+    def is_object_valid(cls, obj: Requirement) -> bool:
+        return (
+            isinstance(obj, InstallableRequirement)
+            and obj.module == "lightgbm"
+        )
diff --git a/mlem/contrib/sklearn.py b/mlem/contrib/sklearn.py
index c4689003..f71688bc 100644
--- a/mlem/contrib/sklearn.py
+++ b/mlem/contrib/sklearn.py
@@ -66,4 +66,6 @@ def get_requirements(self) -> Requirements:
             )  # FIXME: https://github.com/iterative/mlem/issues/34 # optimize methods reqs
 
         # some sklearn compatible model (either from library or user code) - fallback
-        return super().get_requirements()
+        return super().get_requirements() + InstallableRequirement.from_module(
+            sklearn
+        )
diff --git a/mlem/contrib/xgboost.py b/mlem/contrib/xgboost.py
index d20260cd..ac9f65f5 100644
--- a/mlem/contrib/xgboost.py
+++ b/mlem/contrib/xgboost.py
@@ -12,7 +12,9 @@
 from mlem.core.hooks import IsInstanceHookMixin
 from mlem.core.model import ModelHook, ModelIO, ModelType, Signature
 from mlem.core.requirements import (
+    AddRequirementHook,
     InstallableRequirement,
+    Requirement,
     Requirements,
     UnixPackageRequirement,
     WithRequirements,
@@ -172,3 +174,13 @@ def get_requirements(self) -> Requirements:
             + InstallableRequirement.from_module(xgboost)
             + XGB_REQUIREMENT
         )
+
+
+class XGBLibgopmHook(AddRequirementHook):
+    to_add = XGB_REQUIREMENT
+
+    @classmethod
+    def is_object_valid(cls, obj: Requirement) -> bool:
+        return (
+            isinstance(obj, InstallableRequirement) and obj.module == "xgboost"
+        )
diff --git a/mlem/core/artifacts.py b/mlem/core/artifacts.py
index a2dc24fa..d1c3801b 100644
--- a/mlem/core/artifacts.py
+++ b/mlem/core/artifacts.py
@@ -98,9 +98,10 @@ class FSSpecStorage(Storage):
     storage_options: Optional[Dict[str, str]] = {}
 
     def upload(self, local_path: str, target_path: str) -> FSSpecArtifact:
-        self.get_fs().upload(
-            local_path, os.path.join(self.base_path, target_path)
-        )
+        fs = self.get_fs()
+        path = os.path.join(self.base_path, target_path)
+        fs.makedirs(os.path.dirname(path), exist_ok=True)
+        fs.upload(local_path, path)
         return FSSpecArtifact(uri=self.create_uri(target_path))
 
     @contextlib.contextmanager
@@ -146,6 +147,10 @@ class LocalStorage(FSSpecStorage):
     type: ClassVar = "local"
     fs = LocalFileSystem()
 
+    @property
+    def base_path(self):
+        return self.uri
+
     def relative(self, fs: AbstractFileSystem, path: str) -> "Storage":
         if isinstance(fs, LocalFileSystem):
             return LocalStorage(uri=self.create_uri(path))
@@ -158,11 +163,12 @@ def relative(self, fs: AbstractFileSystem, path: str) -> "Storage":
         return storage
 
     def upload(self, local_path: str, target_path: str) -> "LocalArtifact":
-        return LocalArtifact(uri=super().upload(local_path, target_path).uri)
+        super().upload(local_path, target_path)
+        return LocalArtifact(uri=target_path)
 
     @contextlib.contextmanager
     def open(self, path) -> Iterator[Tuple[IO, "LocalArtifact"]]:
-        with super().open(os.path.join(self.uri, path)) as (io, _):
+        with super().open(path) as (io, _):
             yield io, LocalArtifact(uri=path)
 
 
diff --git a/mlem/core/errors.py b/mlem/core/errors.py
index 4b983c0e..b9771108 100644
--- a/mlem/core/errors.py
+++ b/mlem/core/errors.py
@@ -46,3 +46,11 @@ class MlemObjectNotSavedError(ValueError, MlemError):
 
 class ObjectExistsError(ValueError, MlemError):
     """Thrown if we attempt to write object, but something already exists in the given path"""
+
+
+class HookNotFound(MlemError):
+    """Thrown if object does not have suitable hook"""
+
+
+class MultipleHooksFound(MlemError):
+    """Thrown if more than one hook found for object"""
diff --git a/mlem/core/hooks.py b/mlem/core/hooks.py
index 542df77d..9f837f5d 100644
--- a/mlem/core/hooks.py
+++ b/mlem/core/hooks.py
@@ -9,6 +9,8 @@
 from abc import ABC, abstractmethod
 from typing import Any, ClassVar, Generic, List, Tuple, Type, TypeVar
 
+from mlem.core.errors import HookNotFound, MultipleHooksFound
+
 logger = logging.getLogger(__name__)
 
 ANALYZER_FIELD = "analyzer"
@@ -40,7 +42,7 @@ def is_object_valid(cls, obj: Any) -> bool:
         :param obj: object to analyze
         :return: True or False
         """
-        raise NotImplementedError()
+        raise NotImplementedError
 
     @classmethod
     @abstractmethod
@@ -52,7 +54,7 @@ def process(cls, obj: Any, **kwargs) -> T:
         :param kwargs: additional information to be used for analysis
         :return: analysis result
         """
-        raise NotImplementedError()
+        raise NotImplementedError
 
     def __init_subclass__(cls, *args, **kwargs):
         if not inspect.isabstract(cls):
@@ -186,10 +188,10 @@ def _find_hook(cls, obj) -> Type[Hook[T]]:
             if len(lp_hooks) == 1:
                 return lp_hooks[0]
             if len(lp_hooks) > 1:
-                raise ValueError(
+                raise MultipleHooksFound(
                     f"Multiple suitable hooks for object {obj} ({lp_hooks})"
                 )
-            raise ValueError(
+            raise HookNotFound(
                 f"No suitable {cls.base_hook_class.__name__} for object of type "
                 f'"{type(obj).__name__}". Registered hooks: {cls.hooks}'
             )
diff --git a/mlem/core/meta_io.py b/mlem/core/meta_io.py
index 2395dfc1..96a7f800 100644
--- a/mlem/core/meta_io.py
+++ b/mlem/core/meta_io.py
@@ -64,7 +64,7 @@ def get_path_by_fs_path(fs: AbstractFileSystem, path: str):
     Another alternative is to support this on fsspec level, but we need to contribute it ourselves"""
     if isinstance(fs, GithubFileSystem):
         # here "rev" should be already url encoded
-        return f"{fs.protocol}://{fs.org}:{fs.repo}@{fs.root}/{path}"
+        return f"https://github.com/{fs.org}/{fs.repo}/tree/{fs.root}/{path}"
     protocol = fs.protocol
     if isinstance(protocol, (list, tuple)):
         if any(path.startswith(p) for p in protocol):
diff --git a/mlem/core/metadata.py b/mlem/core/metadata.py
index 1c38ab46..7884a463 100644
--- a/mlem/core/metadata.py
+++ b/mlem/core/metadata.py
@@ -10,6 +10,7 @@
 from typing_extensions import Literal
 from yaml import safe_load
 
+from mlem.core.errors import HookNotFound
 from mlem.core.meta_io import get_fs, get_meta_path, get_path_by_repo_path_rev
 from mlem.core.objects import DatasetMeta, MlemMeta, ModelMeta, find_object
 from mlem.utils.root import find_mlem_root
@@ -19,7 +20,7 @@ def get_object_metadata(obj: Any, tmp_sample_data=None) -> MlemMeta:
     """Convert given object to appropriate MlemMeta subclass"""
     try:
         return DatasetMeta.from_data(obj)
-    except ValueError:  # TODO need separate analysis exception
+    except HookNotFound:
         return ModelMeta.from_obj(obj, sample_data=tmp_sample_data)
 
 
diff --git a/mlem/core/model.py b/mlem/core/model.py
index cdfc5b41..88101c4d 100644
--- a/mlem/core/model.py
+++ b/mlem/core/model.py
@@ -18,6 +18,7 @@
 )
 from mlem.core.hooks import Analyzer, Hook
 from mlem.core.requirements import Requirements, WithRequirements
+from mlem.utils.module import get_object_requirements
 
 
 class ModelIO(MlemObject):
@@ -255,7 +256,7 @@ def get_requirements(self) -> Requirements:
                 for m in self.methods.values()
                 for r in m.get_requirements().__root__
             ]
-        )
+        ) + get_object_requirements(self.model)
 
 
 class ModelHook(Hook[ModelType], ABC):
@@ -264,7 +265,7 @@ class ModelHook(Hook[ModelType], ABC):
     def process(  # pylint: disable=arguments-differ # so what
         cls, obj: Any, sample_data: Optional[Any] = None, **kwargs
     ) -> ModelType:
-        pass
+        raise NotImplementedError
 
 
 class ModelAnalyzer(Analyzer[ModelType]):
diff --git a/mlem/core/objects.py b/mlem/core/objects.py
index df1b3f9f..4255ee28 100644
--- a/mlem/core/objects.py
+++ b/mlem/core/objects.py
@@ -407,7 +407,9 @@ class ModelMeta(_ExternalMeta):
     def from_obj(cls, model: Any, sample_data: Any = None) -> "ModelMeta":
         mt = ModelAnalyzer.analyze(model, sample_data=sample_data)
         mt.model = model
-        return ModelMeta(model_type=mt, requirements=mt.get_requirements())
+        return ModelMeta(
+            model_type=mt, requirements=mt.get_requirements().expanded
+        )
 
     def write_value(self, mlem_root: str) -> Artifacts:
         if self.model_type.model is not None:
@@ -417,7 +419,7 @@ def write_value(self, mlem_root: str) -> Artifacts:
                 self.model_type.model,
             )
         else:
-            raise NotImplementedError()  # TODO: https://github.com/iterative/mlem/issues/37
+            raise NotImplementedError  # TODO: https://github.com/iterative/mlem/issues/37
             # self.get_artifacts().materialize(path)
         return artifacts
 
@@ -438,7 +440,15 @@ def __getattr__(self, item):
 class DatasetMeta(_ExternalMeta):
     __transient_fields__ = {"dataset"}
     object_type: ClassVar = "dataset"
-    reader: Optional[DatasetReader] = None
+    reader_cache: Optional[Dict]
+    reader: Optional[DatasetReader]
+    reader, reader_raw, reader_cache = lazy_field(
+        DatasetReader,
+        "reader",
+        "reader_cache",
+        parse_as_type=Optional[DatasetReader],
+        default=None,
+    )
     dataset: ClassVar[Optional[Dataset]] = None
 
     @property
@@ -451,7 +461,7 @@ def from_data(cls, data: Any) -> "DatasetMeta":
             data,
         )
         meta = DatasetMeta(
-            requirements=dataset.dataset_type.get_requirements()
+            requirements=dataset.dataset_type.get_requirements().expanded
         )
         meta.dataset = dataset
         return meta
diff --git a/mlem/core/requirements.py b/mlem/core/requirements.py
index 206fa017..7dbd9ce8 100644
--- a/mlem/core/requirements.py
+++ b/mlem/core/requirements.py
@@ -7,7 +7,7 @@
 import json
 import os
 import zlib
-from abc import ABC
+from abc import ABC, abstractmethod
 from pathlib import Path
 from types import ModuleType
 from typing import (
@@ -26,6 +26,9 @@
 from mlem.core.base import MlemObject
 
 # I dont know how to do this better
+from mlem.core.errors import HookNotFound
+from mlem.core.hooks import Analyzer, Hook
+
 MODULE_PACKAGE_MAPPING = {
     "sklearn": "scikit-learn",
     "skimage": "scikit-image",
@@ -132,7 +135,6 @@ class CustomRequirement(PythonRequirement):
     """
 
     type: ClassVar[str] = "custom"
-
     name: str
     source64zip: str
     is_package: bool
@@ -162,7 +164,10 @@ def from_module(mod: ModuleType) -> "CustomRequirement":
                 Path(mod.__file__).read_text(encoding="utf8")
             )
         return CustomRequirement(
-            name=mod.__name__, source64zip=src, is_package=is_package
+            module=mod.__name__.split(".")[0],
+            name=mod.__name__,
+            source64zip=src,
+            is_package=is_package,
         )
 
     @staticmethod
@@ -312,6 +317,10 @@ def modules(self) -> List[str]:
         """
         return [r.module for r in self.of_type(PythonRequirement)]
 
+    @property
+    def expanded(self) -> "Requirements":
+        return expand_requirements(self)
+
     def _add_installable(self, requirement: InstallableRequirement):
         for req in self.installable:
             if req.package == requirement.package:
@@ -407,35 +416,40 @@ def resolve_requirements(other: "AnyRequirements") -> Requirements:
     :param other: requirement in supported format
     :return: :class:`Requirements` instance
     """
-    if not isinstance(other, Requirements):
-        if isinstance(other, list):
-            if len(other) == 0:
-                return Requirements.new()
-            if isinstance(other[0], str):
-                other = Requirements(
-                    __root__=[
-                        InstallableRequirement.from_str(r) for r in other
-                    ]
-                )
-            elif isinstance(other[0], Requirement):
-                other = Requirements(__root__=other)
-            else:
-                raise TypeError(
-                    "only other Requirements, Requirement, list of Requirement objects, string "
-                    "(or list of strings) can be added"
-                )
-        elif isinstance(other, Requirement):
-            other = Requirements(__root__=[other])
-        elif isinstance(other, str):
-            other = Requirements(
-                __root__=[InstallableRequirement.from_str(other)]
+    if isinstance(other, Requirements):
+        return other
+
+    if isinstance(other, list):
+        if len(other) == 0:
+            return Requirements.new()
+
+        if isinstance(other[0], str):
+            return Requirements(
+                __root__=[
+                    InstallableRequirement.from_str(r) for r in set(other)
+                ]
             )
-        else:
-            raise TypeError(
-                "only other Requirements, Requirement, list of Requirement objects, string "
-                "(or list of strings) can be added"
-            )
-    return other
+
+        if isinstance(other[0], Requirement):
+            res = Requirements.new()
+            for r in other:
+                res.add(r)
+            return res
+
+        raise TypeError(
+            "only other Requirements, Requirement, list of Requirement objects, string "
+            "(or list of strings) can be added"
+        )
+    if isinstance(other, Requirement):
+        return Requirements(__root__=[other])
+
+    if isinstance(other, str):
+        return Requirements(__root__=[InstallableRequirement.from_str(other)])
+
+    raise TypeError(
+        "only other Requirements, Requirement, list of Requirement objects, string "
+        "(or list of strings) can be added"
+    )
 
 
 AnyRequirements = Union[
@@ -464,6 +478,40 @@ def get_requirements(self) -> Requirements:
         )
 
 
+class RequirementsHook(Hook[Requirements], ABC):
+    @classmethod
+    @abstractmethod
+    def is_object_valid(cls, obj: Requirement) -> bool:
+        raise NotImplementedError
+
+    @classmethod
+    @abstractmethod
+    def process(cls, obj: Requirement, **kwargs) -> Requirements:
+        raise NotImplementedError
+
+
+class AddRequirementHook(RequirementsHook, ABC):
+    to_add: AnyRequirements = []
+
+    @classmethod
+    def process(cls, obj: Requirement, **kwargs) -> Requirements:
+        return resolve_requirements(cls.to_add) + obj
+
+
+class RequirementsAnalyzer(Analyzer[Requirements]):
+    base_hook_class = RequirementsHook
+
+
+def expand_requirements(requirements: Requirements) -> Requirements:
+    res = Requirements.new()
+    for req in requirements.__root__:
+        try:
+            res += RequirementsAnalyzer.analyze(req)
+        except HookNotFound:
+            res += req
+    return res
+
+
 # Copyright 2019 Zyfra
 # Copyright 2021 Iterative
 #
diff --git a/mlem/polydantic/core.py b/mlem/polydantic/core.py
index e744901c..ff696c5c 100644
--- a/mlem/polydantic/core.py
+++ b/mlem/polydantic/core.py
@@ -3,8 +3,10 @@
 
 from pydantic import BaseModel
 
+from mlem.polydantic.lazy import LazyModel
 
-class PolyModel(BaseModel):
+
+class PolyModel(LazyModel):
     """Base class that enable polymorphism in pydantic models
 
     Attributes:
@@ -53,7 +55,7 @@ def dict(self, **kwargs):
             result[self.__type_field__] = alias
         return result
 
-    @wraps(BaseModel._calculate_keys)
+    @wraps(BaseModel._calculate_keys)  # pylint: disable=protected-access
     def _calculate_keys(self, *args, **kwargs) -> Optional[AbstractSet[str]]:
         """Exclude transient stuff"""
         kwargs["exclude"] = (
diff --git a/mlem/polydantic/lazy.py b/mlem/polydantic/lazy.py
index 89b207f3..e4e06a8d 100644
--- a/mlem/polydantic/lazy.py
+++ b/mlem/polydantic/lazy.py
@@ -1,28 +1,53 @@
-from typing import Type
+from typing import Any, Type
 
-from pydantic import Field, parse_obj_as
+from pydantic import BaseModel, Field, parse_obj_as
 from pydantic.fields import FieldInfo
 
 
+class LazyModel(BaseModel):
+    def __setattr__(self, name, value):
+        """
+        To be able to use properties with setters
+        """
+        prop = getattr(self.__class__, name, None)
+        if (
+            prop is not None
+            and isinstance(prop, property)
+            and prop.fset is not None
+        ):
+            object.__setattr__(self, name, value)
+            return
+        super().__setattr__(name, value)
+
+
 def lazy_field(
-    type_: Type, alias: str, alias_cache: str, *args, **field_kwargs
+    type_: Type,
+    alias: str,
+    alias_cache: str,
+    *args,
+    parse_as_type: Any = None,
+    **field_kwargs
 ):
-
     field_info: FieldInfo = Field(*args, alias=alias, **field_kwargs)
 
-    @property
     def getter(self):
         value = getattr(self, alias_cache)
         if not isinstance(value, type_):
-            value = parse_obj_as(type_, value)
+            value = parse_obj_as(parse_as_type or type_, value)
             setattr(self, alias_cache, value)
         return value
 
-    @property
+    def setter(self, value):
+        setattr(self, alias_cache, value)
+
     def getter_raw(self):
         value = getattr(self, alias_cache)
         if isinstance(value, type_):
             value = value.dict()
         return value
 
-    return getter, getter_raw, field_info
+    return (
+        property(fget=getter, fset=setter),
+        property(fget=getter_raw, fset=setter),
+        field_info,
+    )
diff --git a/mlem/utils/module.py b/mlem/utils/module.py
index 9b9f4bbb..09e6b971 100644
--- a/mlem/utils/module.py
+++ b/mlem/utils/module.py
@@ -160,7 +160,8 @@ def __init__(self):
             [
                 "opcode",
                 "nturl2path",  # pytest requirements missed by isort
-                "pkg_resources",  # EBNT-112: workaround for imports from setup.py (see build/builder/docker.py)
+                "pkg_resources",  # workaround for imports from setup.py (see build/builder/docker.py)
+                "sre_compile",
                 "posixpath",
                 "setuptools",
                 "pydevconsole",
@@ -507,6 +508,7 @@ def _should_ignore(self, mod: ModuleType):
             any(mod.__name__.startswith(i) for i in self.ignoring)
             or is_private_module(mod)
             or is_pseudo_module(mod)
+            or is_builtin_module(mod)
         )
 
     def add_requirement(self, obj_or_module):
@@ -565,7 +567,10 @@ def get_object_requirements(obj) -> Requirements:
     :param obj: obj to analyze
     :return: :class:`.Requirements` object containing all required packages
     """
-    a = RequirementAnalyzer(recurse=True)
+    # there was recurse=True here for some reason, but this caused infinite recursion in some cases
+    # (def a(): b(); def b(): a(); def func(): a(); get_obj_reqs(func) - smth like that)
+    # I removed it and none of the tests failed ¯\_(ツ)_/¯
+    a = RequirementAnalyzer()
     a.dump(obj)
     return a.to_requirements()
 
diff --git a/setup.py b/setup.py
index 1662327c..b1b19d3f 100644
--- a/setup.py
+++ b/setup.py
@@ -66,6 +66,7 @@ def run(self):
     "appdirs",
     "python-daemon",
     "distro",
+    "gitpython",
 ]
 
 # storage
diff --git a/tests/cli/test_apply.py b/tests/cli/test_apply.py
index 7cd2d71e..d992b014 100644
--- a/tests/cli/test_apply.py
+++ b/tests/cli/test_apply.py
@@ -21,6 +21,18 @@ def test_apply(model_path, data_path):
         assert isinstance(predictions, ndarray)
 
 
+def test_apply_no_output(model_path, data_path):
+    runner = CliRunner()
+    result = runner.invoke(
+        apply,
+        [model_path, data_path, "-m", "predict", "--no-link"],
+    )
+    assert result.exit_code == 0, (result.output, result.exception)
+    applying = "applying\n"
+    assert result.output.startswith(applying)
+    assert len(result.output) > len(applying)
+
+
 def test_apply_for_multiple_datasets(model_path, data_path):
     runner = CliRunner()
     result = runner.invoke(
diff --git a/tests/conftest.py b/tests/conftest.py
index a19e7479..e1996bdc 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -230,13 +230,17 @@ def dataset_write_read_check(
 
 
 def check_model_type_common_interface(
-    model_type: ModelType, data_type: DatasetType, returns_type: DatasetType
+    model_type: ModelType,
+    data_type: DatasetType,
+    returns_type: DatasetType,
+    **kwargs,
 ):
     assert PREDICT_METHOD_NAME in model_type.methods
     assert model_type.methods[PREDICT_METHOD_NAME] == Signature(
         name=PREDICT_METHOD_NAME,
         args=[Argument(name=PREDICT_ARG_NAME, type_=data_type)],
         returns=returns_type,
+        **kwargs,
     )
 
 
diff --git a/tests/contrib/test_catboost.py b/tests/contrib/test_catboost.py
index e79048d7..b785856e 100644
--- a/tests/contrib/test_catboost.py
+++ b/tests/contrib/test_catboost.py
@@ -47,7 +47,7 @@ def test_catboost_model(catboost_model_fixture, pandas_data, tmpdir, request):
         ),
     )
 
-    expected_requirements = {"catboost", "pandas", "numpy"}
+    expected_requirements = {"catboost", "pandas", "numpy", "scipy"}
     assert set(cbmw.get_requirements().modules) == expected_requirements
     assert cbmw.model is catboost_model
 
diff --git a/tests/contrib/test_lightgbm.py b/tests/contrib/test_lightgbm.py
index 3b3d2bc3..05b7d17b 100644
--- a/tests/contrib/test_lightgbm.py
+++ b/tests/contrib/test_lightgbm.py
@@ -147,7 +147,8 @@ def test_model__predict_not_dataset(model):
 
 
 def test_model__dump_load(tmpdir, model, dataset_np, local_fs):
-    expected_requirements = {"lightgbm", "numpy"}
+    # pandas is not required, but if it is installed, it is imported by lightgbm
+    expected_requirements = {"lightgbm", "numpy", "scipy", "pandas"}
     assert set(model.get_requirements().modules) == expected_requirements
 
     artifacts = model.dump(
diff --git a/tests/contrib/test_sklearn.py b/tests/contrib/test_sklearn.py
index 346a15ee..72845229 100644
--- a/tests/contrib/test_sklearn.py
+++ b/tests/contrib/test_sklearn.py
@@ -1,3 +1,4 @@
+import lightgbm as lgb
 import numpy as np
 import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
@@ -7,6 +8,7 @@
 from mlem.core.artifacts import LOCAL_STORAGE
 from mlem.core.dataset_type import DatasetAnalyzer
 from mlem.core.model import ModelAnalyzer
+from mlem.core.requirements import UnixPackageRequirement
 from tests.conftest import check_model_type_common_interface
 
 
@@ -34,6 +36,12 @@ def regressor(inp_data, out_data):
     return lr
 
 
+@pytest.fixture
+def lgbm_model(inp_data, out_data):
+    lgbm_regressor = lgb.LGBMRegressor()
+    return lgbm_regressor.fit(inp_data, out_data)
+
+
 @pytest.mark.parametrize("model_fixture", ["classifier", "regressor"])
 def test_hook(model_fixture, inp_data, request):
     model = request.getfixturevalue(model_fixture)
@@ -51,6 +59,22 @@ def test_hook(model_fixture, inp_data, request):
     )
 
 
+def test_hook_lgb(lgbm_model, inp_data):
+    data_type = DatasetAnalyzer.analyze(inp_data)
+    model_type = ModelAnalyzer.analyze(lgbm_model, sample_data=inp_data)
+
+    assert isinstance(model_type, SklearnModel)
+    check_model_type_common_interface(
+        model_type,
+        data_type,
+        NumpyNdarrayType(
+            shape=(None,),
+            dtype="float64",
+        ),
+        varkw="kwargs",
+    )
+
+
 @pytest.mark.parametrize("model", ["classifier", "regressor"])
 def test_model_type__predict(model, inp_data, request):
     model = request.getfixturevalue(model)
@@ -97,6 +121,33 @@ def test_model_type__dump_load(tmpdir, model, inp_data, request):
     assert set(model_type.get_requirements().modules) == expected_requirements
 
 
+def test_model_type_lgb__dump_load(tmpdir, lgbm_model, inp_data):
+    model_type = ModelAnalyzer.analyze(lgbm_model, sample_data=inp_data)
+
+    expected_requirements = {"sklearn", "lightgbm", "pandas", "numpy", "scipy"}
+    reqs = model_type.get_requirements().expanded
+    assert set(reqs.modules) == expected_requirements
+    assert reqs.of_type(UnixPackageRequirement) == [
+        UnixPackageRequirement(package_name="libgomp1")
+    ]
+    artifacts = model_type.dump(LOCAL_STORAGE, tmpdir)
+    model_type.model = None
+
+    with pytest.raises(ValueError):
+        model_type.call_method("predict", inp_data)
+
+    model_type.load(artifacts)
+    np.testing.assert_array_almost_equal(
+        lgbm_model.predict(inp_data),
+        model_type.call_method("predict", inp_data),
+    )
+    reqs = model_type.get_requirements().expanded
+    assert set(reqs.modules) == expected_requirements
+    assert reqs.of_type(UnixPackageRequirement) == [
+        UnixPackageRequirement(package_name="libgomp1")
+    ]
+
+
 # Copyright 2019 Zyfra
 # Copyright 2021 Iterative
 #
diff --git a/tests/contrib/test_xgboost.py b/tests/contrib/test_xgboost.py
index a605d828..c7f3958d 100644
--- a/tests/contrib/test_xgboost.py
+++ b/tests/contrib/test_xgboost.py
@@ -131,7 +131,8 @@ def test_model__predict_not_dmatrix(model):
 
 
 def test_model__dump_load(tmpdir, model, dmatrix_np, local_fs):
-    expected_requirements = {"xgboost", "numpy"}
+    # pandas is not required, but it is conditionally imported by some Booster methods
+    expected_requirements = {"xgboost", "numpy", "scipy", "pandas"}
     assert set(model.get_requirements().modules) == expected_requirements
 
     artifacts = model.dump(
diff --git a/tests/core/test_artifacts.py b/tests/core/test_artifacts.py
index 6a55d269..f3de08c3 100644
--- a/tests/core/test_artifacts.py
+++ b/tests/core/test_artifacts.py
@@ -4,7 +4,12 @@
 from fsspec.implementations.local import LocalFileSystem
 from s3fs import S3FileSystem
 
-from mlem.core.artifacts import FSSpecArtifact, FSSpecStorage, LocalStorage
+from mlem.core.artifacts import (
+    FSSpecArtifact,
+    FSSpecStorage,
+    LocalArtifact,
+    LocalStorage,
+)
 from tests.conftest import long, resource_path
 
 
@@ -53,3 +58,18 @@ def test_relative_storage_local():
     assert rel1 != local_storage
     assert isinstance(rel1, FSSpecStorage)
     assert rel1.uri == "s3://some_path"
+
+
+def test_local_storage_relative(tmpdir):
+    storage = LocalStorage(uri=str(tmpdir))
+    rstorage = storage.relative(LocalFileSystem(), "subdir")
+    with rstorage.open("file2") as (f, open_art):
+        f.write(b"1")
+    assert isinstance(open_art, LocalArtifact)
+    assert open_art.uri == "file2"
+    assert os.path.isfile(os.path.join(tmpdir, "subdir", open_art.uri))
+
+    upload_art = rstorage.upload(__file__, "file")
+    assert isinstance(upload_art, LocalArtifact)
+    assert upload_art.uri == "file"
+    assert os.path.isfile(os.path.join(tmpdir, "subdir", upload_art.uri))
diff --git a/tests/core/test_meta_io.py b/tests/core/test_meta_io.py
index 26c44613..25ea0a2f 100644
--- a/tests/core/test_meta_io.py
+++ b/tests/core/test_meta_io.py
@@ -1,4 +1,5 @@
 import os.path
+from json import loads
 
 import pytest
 from fsspec.implementations.github import GithubFileSystem
@@ -118,7 +119,7 @@ def test_get_path_by_fs_path_github():
     )
     uri = get_path_by_fs_path(fs, "path")
     fs2, path = get_fs(uri)
-    assert fs2.to_json() == fs.to_json()
+    assert loads(fs2.to_json()) == loads(fs.to_json())
     assert path == "path"
 
 
diff --git a/tests/core/test_metadata.py b/tests/core/test_metadata.py
index c63fb814..d2ea05fe 100644
--- a/tests/core/test_metadata.py
+++ b/tests/core/test_metadata.py
@@ -128,6 +128,7 @@ def test_load_link_with_fsspec_path(current_test_branch):
         model.predict(train)
 
 
+@pytest.mark.xfail  # TODO: https://github.com/iterative/mlem/issues/110
 @long
 def test_saving_to_s3(model, s3_storage_fs, s3_tmp_path):
     path = s3_tmp_path("model_save")
@@ -143,6 +144,7 @@ def test_saving_to_s3(model, s3_storage_fs, s3_tmp_path):
     assert s3_storage_fs.isfile(str(model_path / ART_DIR / "data.pkl"))
 
 
+@pytest.mark.xfail  # TODO: https://github.com/iterative/mlem/issues/110
 @long
 def test_loading_from_s3(model, s3_storage_fs, s3_tmp_path):
     path = s3_tmp_path("model_load")
diff --git a/tests/core/test_objects.py b/tests/core/test_objects.py
index 086ae1d4..ba6db8fc 100644
--- a/tests/core/test_objects.py
+++ b/tests/core/test_objects.py
@@ -18,17 +18,23 @@
 from mlem.core.objects import MlemLink, ModelMeta, mlem_dir_path
 from tests.conftest import MLEM_TEST_REPO, long, need_test_repo_auth
 
+MODEL_NAME = "decision_tree"
+
 
 def test_model_dump(mlem_root):
     X, y = load_iris(return_X_y=True)
     clf = DecisionTreeClassifier().fit(X, y)
     meta = ModelMeta.from_obj(clf, sample_data=X)
-    dir = os.path.join(mlem_root, "decision_tree")
+    dir = os.path.join(mlem_root, MODEL_NAME)
     meta.dump(dir, link=True)
     link_path = os.path.join(
-        mlem_root, MLEM_DIR, "model", "decision_tree" + MLEM_EXT
+        mlem_root, MLEM_DIR, "model", MODEL_NAME + MLEM_EXT
     )
     assert os.path.exists(link_path)
+    assert os.path.exists(os.path.join(dir, META_FILE_NAME))
+    assert os.path.exists(
+        os.path.join(mlem_root, MODEL_NAME, ART_DIR, "data.pkl")
+    )
     model = load(link_path, follow_links=True)
     model.predict(X)
 
diff --git a/tests/core/test_requirements.py b/tests/core/test_requirements.py
new file mode 100644
index 00000000..fe58d535
--- /dev/null
+++ b/tests/core/test_requirements.py
@@ -0,0 +1,124 @@
+import pytest
+
+from mlem.core.requirements import (
+    CustomRequirement,
+    InstallableRequirement,
+    Requirements,
+    resolve_requirements,
+)
+
+
+def test_resolve_requirements_arg():
+    requirements = Requirements.new(
+        [
+            InstallableRequirement(module="dumb", version="0.4.1"),
+            InstallableRequirement(module="art", version="4.0"),
+        ]
+    )
+    actual_reqs = resolve_requirements(requirements)
+    assert actual_reqs == requirements
+
+
+def test_resolve_requirement_arg():
+    req = InstallableRequirement(module="dumb", version="0.4.1")
+    actual_reqs = resolve_requirements(req)
+    assert actual_reqs.installable[0] == req
+
+
+def test_resolve_requirement_list_arg():
+    req = [
+        InstallableRequirement(module="dumb", version="0.4.1"),
+        InstallableRequirement(module="art", version="4.0"),
+    ]
+    actual_reqs = resolve_requirements(req)
+    assert len(actual_reqs.installable) == 2
+    assert actual_reqs.installable == req
+
+
+def test_resolve_str_arg():
+    req = "dumb==0.4.1"
+    actual_reqs = resolve_requirements(req)
+    assert actual_reqs.installable[0].to_str() == req
+
+
+def test_resolve_str_list_arg():
+    req = ["dumb==0.4.1", "art==4.0"]
+    actual_reqs = resolve_requirements(req)
+    assert len(actual_reqs.installable) == 2
+    assert sorted(req) == sorted([r.to_str() for r in actual_reqs.installable])
+
+
+def test_installable_requirement__from_module():
+    import pandas as pd
+
+    assert (
+        InstallableRequirement.from_module(pd).to_str()
+        == f"pandas=={pd.__version__}"
+    )
+
+    import numpy as np
+
+    assert (
+        InstallableRequirement.from_module(np).to_str()
+        == f"numpy=={np.__version__}"
+    )
+
+    import sklearn as sk
+
+    assert (
+        InstallableRequirement.from_module(sk).to_str()
+        == f"scikit-learn=={sk.__version__}"
+    )
+    assert (
+        InstallableRequirement.from_module(sk, "xyz").to_str()
+        == f"xyz=={sk.__version__}"
+    )
+
+
+def test_custom_requirement__source():
+    from mlem import core
+
+    package = CustomRequirement.from_module(core)
+    assert package.is_package
+    assert package.sources is not None
+    with pytest.raises(AttributeError):
+        package.source  # pylint: disable=pointless-statement
+
+    module = CustomRequirement.from_module(core.requirements)
+    assert not module.is_package
+    assert module.source is not None
+    with pytest.raises(AttributeError):
+        module.sources  # pylint: disable=pointless-statement
+
+
+def test_resolve_unique_str():
+    reqs_str = ["a==1", "a==1"]
+    reqs = Requirements.new(reqs_str)
+    assert len(reqs.__root__) == 1
+    assert reqs.installable[0] == InstallableRequirement(
+        module="a", version="1"
+    )
+
+
+def test_resolve_unique_req():
+    req = InstallableRequirement(module="a", version="1")
+    reqs_list = [req, req]
+    reqs = Requirements.new(reqs_list)
+    assert len(reqs.__root__) == 1
+    assert reqs.installable[0] == req
+
+
+# Copyright 2019 Zyfra
+# Copyright 2021 Iterative
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/polydantic/test_lazy.py b/tests/polydantic/test_lazy.py
index c0a5446f..ac4d3998 100644
--- a/tests/polydantic/test_lazy.py
+++ b/tests/polydantic/test_lazy.py
@@ -1,9 +1,9 @@
-from typing import Any
+from typing import Any, Dict, Optional
 
 import pytest
 from pydantic import BaseModel, ValidationError, parse_obj_as, validator
 
-from mlem.polydantic.lazy import lazy_field
+from mlem.polydantic.lazy import LazyModel, lazy_field
 
 
 class Payload(BaseModel):
@@ -60,3 +60,23 @@ def test_setting_value():
     obj.field.value = 2
     assert obj.field.value == 2
     assert obj.field_raw["value"] == 2
+
+
+class ModelWithOptional(LazyModel):
+    field_cache: Optional[Dict]
+    field, field_raw, field_cache = lazy_field(
+        Payload,
+        "field",
+        "field_cache",
+        parse_as_type=Optional[Payload],
+        default=None,
+    )
+
+
+def test_setting_optional_field():
+    obj = ModelWithOptional()
+    assert obj.field is None
+    obj.field = Payload(value=0)
+    assert obj.field.value == 1
+    obj.field_raw = {"value": 5}
+    assert obj.field.value == 6