From cc8852827a338effc1488b66d14c7601d9d5a23f Mon Sep 17 00:00:00 2001 From: leej3 Date: Fri, 15 Mar 2024 20:06:33 +0000 Subject: [PATCH 1/6] add fbresearch_logger.py Add FBResearchLogger class from unmerged branch object-detection-example Add minimal docs and tests --- docs/source/handlers.rst | 1 + ignite/handlers/fbresearch_logger.py | 172 ++++++++++++++++++ .../ignite/handlers/test_fbresearch_logger.py | 58 ++++++ 3 files changed, 231 insertions(+) create mode 100644 ignite/handlers/fbresearch_logger.py create mode 100644 tests/ignite/handlers/test_fbresearch_logger.py diff --git a/docs/source/handlers.rst b/docs/source/handlers.rst index 348639ccf2a..4c312be8b45 100644 --- a/docs/source/handlers.rst +++ b/docs/source/handlers.rst @@ -54,6 +54,7 @@ Loggers visdom_logger wandb_logger + fbresearch_logger .. seealso:: diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py new file mode 100644 index 00000000000..29af6b7cb25 --- /dev/null +++ b/ignite/handlers/fbresearch_logger.py @@ -0,0 +1,172 @@ +"""FBResearch logger and its helper handlers.""" + +import datetime + +import torch + +from ignite.engine import Engine, Events +from ignite.handlers import Timer + + +MB = 1024.0 * 1024.0 + + +class FBResearchLogger: + """Logs training and validation metrics for research purposes. + + This logger is designed to attach to an Ignite Engine and log various metrics + and system stats at configurable intervals, including learning rates, iteration + times, and GPU memory usage. + + Args: + logger (logging.Logger): The logger to use for output. + delimiter (str): The delimiter to use between metrics in the log output. + show_output (bool): Flag to enable logging of the output from the engine's process function. + + Examples: + .. code-block:: python + + import logging + from ignite.handlers.fbresearch_logger import * + + logger = FBResearchLogger(logger=logging.Logger(__name__), show_output=True) + logger.attach(trainer, name="Train", every=10, optimizer=my_optimizer) + """ + + def __init__(self, logger, delimiter=" ", show_output=False): + self.delimiter = delimiter + self.logger = logger + self.iter_timer = None + self.data_timer = None + self.show_output = show_output + + def attach(self, engine: Engine, name: str, every: int = 1, optimizer=None): + """Attaches all the logging handlers to the given engine. + + Args: + engine (Engine): The engine to attach the logging handlers to. + name (str): The name of the engine (e.g., "Train", "Validate") to include in log messages. + every (int): Frequency of iterations to log information. Logs are generated every 'every' iterations. + optimizer: The optimizer used during training to log current learning rates. + """ + engine.add_event_handler(Events.EPOCH_STARTED, self.log_epoch_started, engine, name) + engine.add_event_handler(Events.ITERATION_COMPLETED(every=every), self.log_every, engine, optimizer=optimizer) + engine.add_event_handler(Events.EPOCH_COMPLETED, self.log_epoch_completed, engine, name) + engine.add_event_handler(Events.COMPLETED, self.log_completed, engine, name) + + self.iter_timer = Timer(average=True) + self.iter_timer.attach( + engine, + start=Events.EPOCH_STARTED, + resume=Events.ITERATION_STARTED, + pause=Events.ITERATION_COMPLETED, + step=Events.ITERATION_COMPLETED, + ) + self.data_timer = Timer(average=True) + self.data_timer.attach( + engine, + start=Events.EPOCH_STARTED, + resume=Events.GET_BATCH_STARTED, + pause=Events.GET_BATCH_COMPLETED, + step=Events.GET_BATCH_COMPLETED, + ) + + def log_every(self, engine, optimizer=None): + """ + Logs the training progress at regular intervals. + + Args: + engine (Engine): The training engine. + optimizer (torch.optim.Optimizer, optional): The optimizer used for training. Defaults to None. + """ + cuda_max_mem = "" + if torch.cuda.is_available(): + cuda_max_mem = f"GPU Max Mem: {torch.cuda.max_memory_allocated() / MB:.0f} MB" + + current_iter = engine.state.iteration % (engine.state.epoch_length + 1) + iter_avg_time = self.iter_timer.value() + + eta_seconds = iter_avg_time * (engine.state.epoch_length - current_iter) + + outputs = [] + if self.show_output: + output = engine.state.output + if isinstance(output, dict): + outputs += [f"{k}: {v:.4f}" for k, v in output.items()] + else: + outputs += [f"{v:.4f}" if isinstance(v, float) else f"{v}" for v in output] + + lrs = "" + if optimizer is not None: + if len(optimizer.param_groups) == 1: + lrs += f"lr: {optimizer.param_groups[0]['lr']:.5f}" + else: + for i, g in enumerate(optimizer.param_groups): + lrs += f"lr [g{i}]: {g['lr']:.5f}" + + msg = self.delimiter.join( + [ + f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]", + f"[{current_iter}/{engine.state.epoch_length}]:", + f"ETA: {datetime.timedelta(seconds=int(eta_seconds))}", + f"{lrs}", + ] + + outputs + + [ + f"Iter time: {iter_avg_time:.4f} s", + f"Data prep time: {self.data_timer.value():.4f} s", + cuda_max_mem, + ] + ) + self.logger.info(msg) + + def log_epoch_started(self, engine, name): + """ + Logs the start of an epoch. + + Args: + engine (Engine): The engine object. + name (str): The name of the epoch. + + """ + msg = f"{name}: start epoch [{engine.state.epoch}/{engine.state.max_epochs}]" + self.logger.info(msg) + + def log_epoch_completed(self, engine, name): + """ + Logs the completion of an epoch. + + Args: + engine (Engine): The engine object. + name (str): The name of the epoch. + + """ + epoch_time = engine.state.times[Events.EPOCH_COMPLETED.name] + epoch_info = f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]" if engine.state.max_epochs > 1 else "" + msg = self.delimiter.join( + [ + f"{name}: {epoch_info}", + f"Total time: {datetime.timedelta(seconds=int(epoch_time))}", + f"({epoch_time / engine.state.epoch_length:.4f} s / it)", + ] + ) + self.logger.info(msg) + + def log_completed(self, engine, name): + """ + Logs the completion of a run. + + Args: + engine (Engine): The engine object. + name (str): The name of the run. + + """ + if engine.state.max_epochs > 1: + total_time = engine.state.times[Events.COMPLETED.name] + msg = self.delimiter.join( + [ + f"{name}: run completed", + f"Total time: {datetime.timedelta(seconds=int(total_time))}", + ] + ) + self.logger.info(msg) diff --git a/tests/ignite/handlers/test_fbresearch_logger.py b/tests/ignite/handlers/test_fbresearch_logger.py new file mode 100644 index 00000000000..b85bdcf2794 --- /dev/null +++ b/tests/ignite/handlers/test_fbresearch_logger.py @@ -0,0 +1,58 @@ +import logging +import re +from unittest.mock import MagicMock + +import pytest + +from ignite.engine import Engine, Events +from ignite.handlers.fbresearch_logger import FBResearchLogger # Adjust the import path as necessary + + +@pytest.fixture +def mock_engine(): + engine = Engine(lambda e, b: None) + engine.state.epoch = 1 + engine.state.max_epochs = 10 + engine.state.epoch_length = 100 + engine.state.iteration = 50 + return engine + + +@pytest.fixture +def mock_logger(): + return MagicMock(spec=logging.Logger) + + +@pytest.fixture +def fb_research_logger(mock_logger): + yield FBResearchLogger(logger=mock_logger, show_output=True) + + +def test_fbresearch_logger_initialization(mock_logger): + logger = FBResearchLogger(logger=mock_logger, show_output=True) + assert logger.logger == mock_logger + assert logger.show_output is True + + +def test_fbresearch_logger_attach(mock_engine, mock_logger): + logger = FBResearchLogger(logger=mock_logger, show_output=True) + logger.attach(mock_engine, name="Test", every=1) + assert mock_engine.has_event_handler(logger.log_every, Events.ITERATION_COMPLETED) + + +@pytest.mark.parametrize( + "output,expected_pattern", + [ + ({"loss": 0.456, "accuracy": 0.789}, r"loss. *0.456.*accuracy. *0.789"), + ((0.456, 0.789), r"0.456.*0.789"), + ([0.456, 0.789], r"0.456.*0.789"), + ], +) +def test_output_formatting(mock_engine, fb_research_logger, output, expected_pattern): + # Ensure the logger correctly formats and logs the output for each type + mock_engine.state.output = output + fb_research_logger.attach(mock_engine, name="Test", every=1) + mock_engine.fire_event(Events.ITERATION_COMPLETED) + + actual_output = fb_research_logger.logger.info.call_args_list[0].args[0] + assert re.search(expected_pattern, actual_output) From 1f6f20d1b8555dfa3e9a41cef008aaa83e625c92 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 26 Mar 2024 11:56:24 +0000 Subject: [PATCH 2/6] add some mypy fixes --- ignite/handlers/fbresearch_logger.py | 57 +++++++++++++++++----------- 1 file changed, 35 insertions(+), 22 deletions(-) diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py index 29af6b7cb25..3c35262abec 100644 --- a/ignite/handlers/fbresearch_logger.py +++ b/ignite/handlers/fbresearch_logger.py @@ -1,6 +1,9 @@ """FBResearch logger and its helper handlers.""" import datetime +from typing import Any, Optional + +# from typing import Any, Dict, Optional, Union import torch @@ -33,14 +36,16 @@ class FBResearchLogger: logger.attach(trainer, name="Train", every=10, optimizer=my_optimizer) """ - def __init__(self, logger, delimiter=" ", show_output=False): + def __init__(self, logger: Any, delimiter: str = " ", show_output: bool = False): self.delimiter = delimiter - self.logger = logger - self.iter_timer = None - self.data_timer = None - self.show_output = show_output - - def attach(self, engine: Engine, name: str, every: int = 1, optimizer=None): + self.logger: Any = logger + self.iter_timer: Timer = Timer(average=True) + self.data_timer: Timer = Timer(average=True) + self.show_output: bool = show_output + + def attach( + self, engine: Engine, name: str, every: int = 1, optimizer: Optional[torch.optim.Optimizer] = None + ) -> None: """Attaches all the logging handlers to the given engine. Args: @@ -54,7 +59,7 @@ def attach(self, engine: Engine, name: str, every: int = 1, optimizer=None): engine.add_event_handler(Events.EPOCH_COMPLETED, self.log_epoch_completed, engine, name) engine.add_event_handler(Events.COMPLETED, self.log_completed, engine, name) - self.iter_timer = Timer(average=True) + self.iter_timer.reset() self.iter_timer.attach( engine, start=Events.EPOCH_STARTED, @@ -62,7 +67,7 @@ def attach(self, engine: Engine, name: str, every: int = 1, optimizer=None): pause=Events.ITERATION_COMPLETED, step=Events.ITERATION_COMPLETED, ) - self.data_timer = Timer(average=True) + self.data_timer.reset() self.data_timer.attach( engine, start=Events.EPOCH_STARTED, @@ -71,7 +76,7 @@ def attach(self, engine: Engine, name: str, every: int = 1, optimizer=None): step=Events.GET_BATCH_COMPLETED, ) - def log_every(self, engine, optimizer=None): + def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] = None) -> None: """ Logs the training progress at regular intervals. @@ -79,6 +84,7 @@ def log_every(self, engine, optimizer=None): engine (Engine): The training engine. optimizer (torch.optim.Optimizer, optional): The optimizer used for training. Defaults to None. """ + assert engine.state.epoch_length is not None cuda_max_mem = "" if torch.cuda.is_available(): cuda_max_mem = f"GPU Max Mem: {torch.cuda.max_memory_allocated() / MB:.0f} MB" @@ -89,12 +95,12 @@ def log_every(self, engine, optimizer=None): eta_seconds = iter_avg_time * (engine.state.epoch_length - current_iter) outputs = [] - if self.show_output: + if self.show_output and engine.state.output is not None: output = engine.state.output if isinstance(output, dict): outputs += [f"{k}: {v:.4f}" for k, v in output.items()] else: - outputs += [f"{v:.4f}" if isinstance(v, float) else f"{v}" for v in output] + outputs += [f"{v:.4f}" if isinstance(v, float) else f"{v}" for v in output] # type: ignore lrs = "" if optimizer is not None: @@ -120,7 +126,7 @@ def log_every(self, engine, optimizer=None): ) self.logger.info(msg) - def log_epoch_started(self, engine, name): + def log_epoch_started(self, engine: Engine, name: str) -> None: """ Logs the start of an epoch. @@ -132,37 +138,44 @@ def log_epoch_started(self, engine, name): msg = f"{name}: start epoch [{engine.state.epoch}/{engine.state.max_epochs}]" self.logger.info(msg) - def log_epoch_completed(self, engine, name): + def log_epoch_completed(self, engine: Engine, name: str) -> None: """ Logs the completion of an epoch. Args: - engine (Engine): The engine object. - name (str): The name of the epoch. + engine (Engine): The engine object that triggered the event. + name (str): The name of the event. + Returns: + None """ epoch_time = engine.state.times[Events.EPOCH_COMPLETED.name] - epoch_info = f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]" if engine.state.max_epochs > 1 else "" + epoch_info = ( + f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]" + if engine.state.max_epochs > 1 + else "" # type: ignore + ) msg = self.delimiter.join( [ f"{name}: {epoch_info}", - f"Total time: {datetime.timedelta(seconds=int(epoch_time))}", - f"({epoch_time / engine.state.epoch_length:.4f} s / it)", + f"Total time: {datetime.timedelta(seconds=int(epoch_time))}", # type: ignore + f"({epoch_time / engine.state.epoch_length:.4f} s / it)", # type: ignore ] ) self.logger.info(msg) - def log_completed(self, engine, name): + def log_completed(self, engine: Engine, name: str) -> None: """ Logs the completion of a run. Args: - engine (Engine): The engine object. + engine (Engine): The engine object representing the training/validation loop. name (str): The name of the run. """ - if engine.state.max_epochs > 1: + if engine.state.max_epochs and engine.state.max_epochs > 1: total_time = engine.state.times[Events.COMPLETED.name] + assert total_time is not None msg = self.delimiter.join( [ f"{name}: run completed", From 60717c21183e5b654a045ca93dd98c2e01c9a597 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 26 Mar 2024 14:02:20 +0000 Subject: [PATCH 3/6] fix docs bug --- ignite/handlers/fbresearch_logger.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py index 3c35262abec..6e698f2ab5c 100644 --- a/ignite/handlers/fbresearch_logger.py +++ b/ignite/handlers/fbresearch_logger.py @@ -82,7 +82,7 @@ def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] = Args: engine (Engine): The training engine. - optimizer (torch.optim.Optimizer, optional): The optimizer used for training. Defaults to None. + optimizer (torch.optim.Optimizer): The optimizer used for training. Defaults to None. """ assert engine.state.epoch_length is not None cuda_max_mem = "" From a0c0b91d4ef718502ea00087ec12ef6160beeca3 Mon Sep 17 00:00:00 2001 From: John lee Date: Tue, 26 Mar 2024 14:19:24 +0000 Subject: [PATCH 4/6] Update ignite/handlers/fbresearch_logger.py Co-authored-by: vfdev --- ignite/handlers/fbresearch_logger.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py index 6e698f2ab5c..0d3ea624c67 100644 --- a/ignite/handlers/fbresearch_logger.py +++ b/ignite/handlers/fbresearch_logger.py @@ -22,9 +22,9 @@ class FBResearchLogger: times, and GPU memory usage. Args: - logger (logging.Logger): The logger to use for output. - delimiter (str): The delimiter to use between metrics in the log output. - show_output (bool): Flag to enable logging of the output from the engine's process function. + logger: The logger to use for output. + delimiter: The delimiter to use between metrics in the log output. + show_output: Flag to enable logging of the output from the engine's process function. Examples: .. code-block:: python From e3a41d4e9a6f593b5f127fac7156b50a6173f506 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 26 Mar 2024 14:27:04 +0000 Subject: [PATCH 5/6] fix type error --- ignite/handlers/fbresearch_logger.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py index 0d3ea624c67..21b1eb87773 100644 --- a/ignite/handlers/fbresearch_logger.py +++ b/ignite/handlers/fbresearch_logger.py @@ -152,8 +152,8 @@ def log_epoch_completed(self, engine: Engine, name: str) -> None: epoch_time = engine.state.times[Events.EPOCH_COMPLETED.name] epoch_info = ( f"Epoch [{engine.state.epoch}/{engine.state.max_epochs}]" - if engine.state.max_epochs > 1 - else "" # type: ignore + if engine.state.max_epochs > 1 # type: ignore + else "" ) msg = self.delimiter.join( [ From a4c069092ed5beaf2a7215465fd8483103354177 Mon Sep 17 00:00:00 2001 From: root Date: Tue, 26 Mar 2024 16:38:19 +0000 Subject: [PATCH 6/6] remove types from docstrings --- ignite/handlers/fbresearch_logger.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/ignite/handlers/fbresearch_logger.py b/ignite/handlers/fbresearch_logger.py index 21b1eb87773..a291138e48d 100644 --- a/ignite/handlers/fbresearch_logger.py +++ b/ignite/handlers/fbresearch_logger.py @@ -49,9 +49,9 @@ def attach( """Attaches all the logging handlers to the given engine. Args: - engine (Engine): The engine to attach the logging handlers to. - name (str): The name of the engine (e.g., "Train", "Validate") to include in log messages. - every (int): Frequency of iterations to log information. Logs are generated every 'every' iterations. + engine: The engine to attach the logging handlers to. + name: The name of the engine (e.g., "Train", "Validate") to include in log messages. + every: Frequency of iterations to log information. Logs are generated every 'every' iterations. optimizer: The optimizer used during training to log current learning rates. """ engine.add_event_handler(Events.EPOCH_STARTED, self.log_epoch_started, engine, name) @@ -81,8 +81,8 @@ def log_every(self, engine: Engine, optimizer: Optional[torch.optim.Optimizer] = Logs the training progress at regular intervals. Args: - engine (Engine): The training engine. - optimizer (torch.optim.Optimizer): The optimizer used for training. Defaults to None. + engine: The training engine. + optimizer: The optimizer used for training. Defaults to None. """ assert engine.state.epoch_length is not None cuda_max_mem = "" @@ -131,8 +131,8 @@ def log_epoch_started(self, engine: Engine, name: str) -> None: Logs the start of an epoch. Args: - engine (Engine): The engine object. - name (str): The name of the epoch. + engine: The engine object. + name: The name of the epoch. """ msg = f"{name}: start epoch [{engine.state.epoch}/{engine.state.max_epochs}]" @@ -143,8 +143,8 @@ def log_epoch_completed(self, engine: Engine, name: str) -> None: Logs the completion of an epoch. Args: - engine (Engine): The engine object that triggered the event. - name (str): The name of the event. + engine: The engine object that triggered the event. + name: The name of the event. Returns: None @@ -169,8 +169,8 @@ def log_completed(self, engine: Engine, name: str) -> None: Logs the completion of a run. Args: - engine (Engine): The engine object representing the training/validation loop. - name (str): The name of the run. + engine: The engine object representing the training/validation loop. + name: The name of the run. """ if engine.state.max_epochs and engine.state.max_epochs > 1: