Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow for logging to Studio when not inside a repo #646

Merged
merged 46 commits into from
Feb 15, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
2160ae4
post to studio even without git/dvc repo
Jul 31, 2023
8653732
tests for no-git scenario
Jul 31, 2023
ca57b17
studio: make no-repo paths relative to cwd
Aug 4, 2023
98ae256
make ruff happy
Aug 4, 2023
df1a20b
don't require exp name
Aug 4, 2023
9ddcf07
don't require baseline rev
Aug 4, 2023
bf01031
Merge branch 'main' into no-git
Aug 4, 2023
3c68cb6
refactor studio path formatting
Aug 4, 2023
f6a0a29
live: Set new defaults `report=None` and `save_dvc_exp=True`.
daavoo Aug 17, 2023
aa3610a
frameworks: Drop model_file.
daavoo Aug 22, 2023
2e538ce
update examples
daavoo Aug 22, 2023
0d21bb6
Merge branch 'main' into 484-30-release
daavoo Aug 30, 2023
bb34d87
fix merge conflicts
Sep 6, 2023
00ac887
Write to root dvc.yaml (#687)
Sep 7, 2023
5dcc43b
report: Drop "auto" logic.
daavoo Sep 7, 2023
7d2528e
studio: Extract `post_to_studio` and decoulple from `make_report` (#705)
daavoo Sep 7, 2023
a02d160
refactor(tests): Split `test_main` into separate files.
daavoo Sep 7, 2023
6ccc959
fix matplotlib warning
daavoo Sep 7, 2023
a539c8e
Merge branch 'main' into 484-30-release
Sep 11, 2023
7a374c4
merge 3.0 changes
Sep 11, 2023
c685bc1
Merge remote-tracking branch 'origin/484-30-release' into no-git
Sep 11, 2023
f3ebcd0
fix studio tests
Sep 11, 2023
68b4f90
fix windows studio paths
Sep 11, 2023
8d2112f
fix windows studio paths for plots
Sep 11, 2023
937bc5b
skip fabric tests if not installed
Dec 7, 2023
524e2a6
fix conflicts with main
Dec 8, 2023
0c49bea
drop dvc repo
Dec 8, 2023
83bb14a
drop dvcignore
Dec 8, 2023
71698f0
drop unrelated test_fabric.py file
Dec 8, 2023
a9b028f
fix windows paths
Dec 8, 2023
bb2a3b5
fix windows paths
Dec 8, 2023
35c34a9
merge main
Jan 22, 2024
28cde80
adapt plot paths even if no dvc repo
Jan 25, 2024
12755f2
default baseline rev to all zeros
Jan 25, 2024
b9ba6f2
Merge branch 'main' into no-git
Jan 26, 2024
939ff0b
Merge branch 'main' into no-git
Feb 6, 2024
ade1b3d
consolidate repro tests
Feb 7, 2024
56acdae
set null sha as variable
Feb 7, 2024
33288cd
add type hints to studio
Feb 13, 2024
73357b2
limit windows path handling to studio
Feb 13, 2024
b5ff21d
merge
Feb 13, 2024
53e77a6
fix typing errors in studio module
Feb 13, 2024
bea5a83
fix mypy in live module
Feb 13, 2024
bd431da
drop checking for dvc_file
Feb 13, 2024
5003104
Merge branch 'main' into no-git
Feb 13, 2024
a4cae82
Merge branch 'main' into no-git
Feb 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/dvclive/dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,12 @@ def mark_dvclive_only_ended():


def get_random_exp_name(scm, baseline_rev):
from dvc.repo.experiments.utils import gen_random_name
from dvc.repo.experiments.utils import (
get_random_exp_name as dvc_get_random_exp_name,
)

return dvc_get_random_exp_name(scm, baseline_rev)
if scm and baseline_rev:
return dvc_get_random_exp_name(scm, baseline_rev)
# TODO: ping studio for list of existing names to check against
dberenbaum marked this conversation as resolved.
Show resolved Hide resolved
return gen_random_name()
33 changes: 10 additions & 23 deletions src/dvclive/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,17 +126,21 @@ def _init_cleanup(self):
def _init_dvc(self):
from dvc.scm import NoSCM

if os.getenv(env.DVC_EXP_BASELINE_REV, None):
self._dvc_repo = get_dvc_repo()

self._exp_name = os.getenv(env.DVC_EXP_NAME)
self._baseline_rev = os.getenv(env.DVC_EXP_BASELINE_REV)
if not self._exp_name:
scm = self._dvc_repo.scm if self._dvc_repo else None
self._exp_name = get_random_exp_name(scm, self._baseline_rev)

if self._dvc_repo and self._baseline_rev and self._exp_name:
# `dvc exp` execution
self._baseline_rev = os.getenv(env.DVC_EXP_BASELINE_REV, "")
self._exp_name = os.getenv(env.DVC_EXP_NAME, "")
self._inside_dvc_exp = True
if self._save_dvc_exp:
logger.info("Ignoring `save_dvc_exp` because `dvc exp run` is running")
self._save_dvc_exp = False

self._dvc_repo = get_dvc_repo()

dvc_logger = logging.getLogger("dvc")
dvc_logger.setLevel(os.getenv(env.DVCLIVE_LOGLEVEL, "WARNING").upper())

Expand All @@ -161,8 +165,8 @@ def _init_dvc(self):
return

self._baseline_rev = self._dvc_repo.scm.get_rev()

if self._save_dvc_exp:
self._exp_name = get_random_exp_name(self._dvc_repo.scm, self._baseline_rev)
mark_dvclive_only_started()
self._include_untracked.append(self.dir)

Expand All @@ -177,23 +181,6 @@ def _init_studio(self):
logger.debug("Skipping `studio` report `start` and `done` events.")
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("done")
elif self._dvc_repo is None:
logger.warning(
"Can't connect to Studio without a DVC Repo."
"\nYou can create a DVC Repo by calling `dvc init`."
)
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("data")
self._studio_events_to_skip.add("done")
elif not self._save_dvc_exp:
logger.warning(
"Can't connect to Studio without creating a DVC experiment."
"\nIf you have a DVC Pipeline, run it with `dvc exp run`."
"\nIf you are using DVCLive alone, use `save_dvc_exp=True`."
)
self._studio_events_to_skip.add("start")
self._studio_events_to_skip.add("data")
self._studio_events_to_skip.add("done")
else:
response = post_live_metrics(
"start",
Expand Down
17 changes: 7 additions & 10 deletions src/dvclive/studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,17 @@ def _cast_to_numbers(datapoints):
return datapoints


def _rel_path(path, dvc_root_path):
def _rel_path(live, path):
absolute_path = Path(path).resolve()
return str(absolute_path.relative_to(dvc_root_path).as_posix())
root = live._dvc_repo.root_dir if live._dvc_repo is not None else os.getcwd()
return str(absolute_path.relative_to(root).as_posix())


def _adapt_plot_name(live, name):
if live._dvc_repo is not None:
name = _rel_path(name, live._dvc_repo.root_dir)
name = _rel_path(live, name)
if os.path.isfile(live.dvc_file):
dvc_file = live.dvc_file
if live._dvc_repo is not None:
dvc_file = _rel_path(live.dvc_file, live._dvc_repo.root_dir)
dvc_file = _rel_path(live, live.dvc_file)
name = f"{dvc_file}::{name}"
return name

Expand Down Expand Up @@ -64,17 +63,15 @@ def _adapt_images(live):
def get_studio_updates(live):
if os.path.isfile(live.params_file):
params_file = live.params_file
if live._dvc_repo is not None:
params_file = _rel_path(params_file, live._dvc_repo.root_dir)
params_file = _rel_path(live, params_file)
params = {params_file: load_yaml(live.params_file)}
else:
params = {}

plots, metrics = parse_metrics(live)

metrics_file = live.metrics_file
if live._dvc_repo is not None:
metrics_file = _rel_path(metrics_file, live._dvc_repo.root_dir)
metrics_file = _rel_path(live, metrics_file)
metrics = {metrics_file: {"data": metrics}}

plots = {
Expand Down
11 changes: 5 additions & 6 deletions tests/test_dvc.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,19 +138,18 @@ def test_exp_save_on_end(tmp_dir, save, mocked_dvc_repo):
)
else:
assert live._baseline_rev is not None
assert live._exp_name is None
assert live._exp_name is not None
mocked_dvc_repo.experiments.save.assert_not_called()


def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocker):
def test_exp_save_skip_on_env_vars(tmp_dir, monkeypatch, mocked_dvc_repo):
monkeypatch.setenv(DVC_EXP_BASELINE_REV, "foo")
monkeypatch.setenv(DVC_EXP_NAME, "bar")

with mocker.patch("dvclive.live.get_dvc_repo", return_value=None):
live = Live(save_dvc_exp=True)
live.end()
live = Live(save_dvc_exp=True)
live.end()

assert live._dvc_repo is None
assert live._dvc_repo is not None
assert live._baseline_rev == "foo"
assert live._exp_name == "bar"
assert live._inside_dvc_exp
Expand Down
106 changes: 99 additions & 7 deletions tests/test_studio.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,10 +299,9 @@ def test_post_to_studio_shorten_names(tmp_dir, mocked_dvc_repo, mocked_studio_po

@pytest.mark.studio()
def test_post_to_studio_inside_dvc_exp(
tmp_dir, mocker, monkeypatch, mocked_studio_post
tmp_dir, mocker, monkeypatch, mocked_studio_post, mocked_dvc_repo
):
mocked_post, _ = mocked_studio_post
mocker.patch("dvclive.live.get_dvc_repo", return_value=None)

monkeypatch.setenv(DVC_EXP_BASELINE_REV, "f" * 40)
monkeypatch.setenv(DVC_EXP_NAME, "bar")
Expand Down Expand Up @@ -398,11 +397,6 @@ def test_post_to_studio_inside_subdir_dvc_exp(
)


def test_post_to_studio_requires_exp(tmp_dir, mocked_dvc_repo, mocked_studio_post):
assert Live()._studio_events_to_skip == {"start", "data", "done"}
assert not Live(save_dvc_exp=True)._studio_events_to_skip


def test_get_dvc_studio_config_none(mocker):
mocker.patch("dvclive.live.get_dvc_repo", return_value=None)
live = Live()
Expand Down Expand Up @@ -485,3 +479,101 @@ def test_post_to_studio_message(tmp_dir, mocked_dvc_repo, mocked_studio_post):
},
timeout=(30, 5),
)


@pytest.mark.parametrize("exp_name", [True, False])
def test_post_to_studio_no_repo(tmp_dir, monkeypatch, mocked_studio_post, exp_name):
monkeypatch.setenv(DVC_STUDIO_TOKEN, "STUDIO_TOKEN")
monkeypatch.setenv(DVC_STUDIO_REPO_URL, "STUDIO_REPO_URL")
monkeypatch.setenv(DVC_EXP_BASELINE_REV, "f" * 40)
if exp_name:
monkeypatch.setenv(DVC_EXP_NAME, "bar")

live = Live(save_dvc_exp=True)
live.log_param("fooparam", 1)

dvc_path = Path(live.dvc_file).as_posix()
metrics_path = Path(live.metrics_file).as_posix()
params_path = Path(live.params_file).as_posix()
foo_path = (Path(live.plots_dir) / Metric.subfolder / "foo.tsv").as_posix()

mocked_post, _ = mocked_studio_post

mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
json={
"type": "start",
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": "f" * 40,
"name": live._exp_name,
"client": "dvclive",
},
headers={
"Authorization": "token STUDIO_TOKEN",
"Content-type": "application/json",
},
timeout=(30, 5),
)

live.log_metric("foo", 1)

live.next_step()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
json={
"type": "data",
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": "f" * 40,
"name": live._exp_name,
"step": 0,
"metrics": {metrics_path: {"data": {"step": 0, "foo": 1}}},
"params": {params_path: {"fooparam": 1}},
"plots": {f"{dvc_path}::{foo_path}": {"data": [{"step": 0, "foo": 1.0}]}},
"client": "dvclive",
},
headers={
"Authorization": "token STUDIO_TOKEN",
"Content-type": "application/json",
},
timeout=(30, 5),
)

live.log_metric("foo", 2)

live.next_step()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
json={
"type": "data",
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": "f" * 40,
"name": live._exp_name,
"step": 1,
"metrics": {metrics_path: {"data": {"step": 1, "foo": 2}}},
"params": {params_path: {"fooparam": 1}},
"plots": {f"{dvc_path}::{foo_path}": {"data": [{"step": 1, "foo": 2.0}]}},
"client": "dvclive",
},
headers={
"Authorization": "token STUDIO_TOKEN",
"Content-type": "application/json",
},
timeout=(30, 5),
)

live.end()
mocked_post.assert_called_with(
"https://0.0.0.0/api/live",
json={
"type": "done",
"repo_url": "STUDIO_REPO_URL",
"baseline_sha": "f" * 40,
"name": live._exp_name,
"client": "dvclive",
},
headers={
"Authorization": "token STUDIO_TOKEN",
"Content-type": "application/json",
},
timeout=(30, 5),
)