Skip to content

Commit 465f186

Browse files
committed
feat: add option to embed data when exporting report to notebook
1 parent f7b00be commit 465f186

File tree

3 files changed

+95
-38371
lines changed

3 files changed

+95
-38371
lines changed

edvart/report.py

+65-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import base64
22
import logging
3+
import os
34
import pickle
45
import warnings
56
from abc import ABC
67
from copy import copy
8+
from enum import Enum, auto
79
from typing import List, Optional, Tuple, Union
810

911
import isort
@@ -35,6 +37,14 @@ class EmptyReportWarning(UserWarning):
3537
"""Warning raised when a report contains no sections."""
3638

3739

40+
class ExportDataMode(str, Enum):
41+
"""Data export mode for the report."""
42+
43+
NONE = auto()
44+
FILE = auto()
45+
EMBED = auto()
46+
47+
3848
class ReportBase(ABC):
3949
"""
4050
Abstract base class for reports.
@@ -55,6 +65,8 @@ class ReportBase(ABC):
5565
"import plotly.io as pio",
5666
}
5767

68+
_DEFAULT_LOAD_DATA_CODE = "df = ... # TODO: Fill in code for loading data"
69+
5870
def __init__(
5971
self,
6072
dataframe: pd.DataFrame,
@@ -84,27 +96,76 @@ def show(self) -> None:
8496
for section in self.sections:
8597
section.show(self.df)
8698

99+
def _export_data(
100+
self, export_data_mode: ExportDataMode, notebook_file_path: Union[str, os.PathLike]
101+
) -> Tuple[str, List[str]]:
102+
"""
103+
Generates code for loading exported data into the exported notebook.
104+
105+
Parameters
106+
----------
107+
export_data_mode : ExportDataMode
108+
The mode of exporting the data.
109+
notebook_file_path : str or PathLike
110+
Filepath of the exported notebook.
111+
112+
-------
113+
Tuple[str, List[str]]
114+
A tuple containing the code for loading the data and a list of imports required for
115+
the code.
116+
"""
117+
if export_data_mode == ExportDataMode.NONE:
118+
return self._DEFAULT_LOAD_DATA_CODE, []
119+
if export_data_mode == ExportDataMode.FILE:
120+
parquet_file_name = str(notebook_file_path).rstrip(".ipynb") + "-data.parquet"
121+
self.df.to_parquet(parquet_file_name)
122+
return f"df = pd.read_parquet('{parquet_file_name}')", ["import pandas as pd"]
123+
assert export_data_mode == ExportDataMode.EMBED
124+
buffer = base64.b85encode(self.df.to_parquet())
125+
return (
126+
code_dedent(
127+
f"""
128+
df_parquet = BytesIO(base64.b85decode({buffer}.decode()))
129+
df = pd.read_parquet(df_parquet)"""
130+
),
131+
["import base64", "import pandas as pd", "from io import BytesIO"],
132+
)
133+
87134
def export_notebook(
88135
self,
89-
notebook_filepath: str,
136+
notebook_filepath: Union[str, os.PathLike],
90137
dataset_name: str = "[INSERT DATASET NAME]",
91138
dataset_description: str = "[INSERT DATASET DESCRIPTION]",
139+
export_data_mode: ExportDataMode = ExportDataMode.NONE,
92140
) -> None:
93141
"""Exports the report as an .ipynb file.
94142
95143
Parameters
96144
----------
97-
notebook_filepath : str
145+
notebook_filepath : str or PathLike
98146
Filepath of the exported notebook.
99147
dataset_name : str (default = "[INSERT DATASET NAME]")
100148
Name of dataset to be used in the title of the report.
101149
dataset_description : str (default = "[INSERT DATASET DESCRIPTION]")
102150
Description of dataset to be used below the title of the report.
151+
export_data_mode : ExportDataMode (default = ExportDataMode.NONE)
152+
Mode for exporting the data to the notebook.
153+
If ExportDataMode.NONE, the data is not exported to the notebook.
154+
If ExportDataMode.FILE, the data is exported to a parquet file
155+
and loaded from there.
156+
If ExportDataMode.EMBED, the data is embedded into the notebook
157+
as a base64 string.
103158
"""
159+
load_data_code, load_data_imports = self._export_data(
160+
export_data_mode, notebook_file_path=notebook_filepath
161+
)
104162
# Generate a notebook containing dataset name and description
105163
self._warn_if_empty()
106164
nb = self._generate_notebook(
107-
dataset_name=dataset_name, dataset_description=dataset_description
165+
dataset_name=dataset_name,
166+
dataset_description=dataset_description,
167+
load_df=load_data_code,
168+
extra_imports=load_data_imports,
108169
)
109170

110171
# Save notebook to file
@@ -113,9 +174,9 @@ def export_notebook(
113174

114175
def _generate_notebook(
115176
self,
177+
load_df: str,
116178
dataset_name: str = "[INSERT DATASET NAME]",
117179
dataset_description: str = "[INSERT DATASET DESCRIPTION]",
118-
load_df: str = "df = ...",
119180
extra_imports: Optional[List[str]] = None,
120181
show_load_data: bool = True,
121182
) -> nbf.NotebookNode:

examples/report-example.ipynb

+12-38,366
Large diffs are not rendered by default.

tests/test_report.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import pathlib
12
import warnings
23
from contextlib import redirect_stdout
34

5+
import nbconvert
6+
import nbformat
47
import numpy as np
58
import pandas as pd
69

7-
from edvart.report import DefaultReport, Report
10+
from edvart.report import DefaultReport, ExportDataMode, Report
811
from edvart.report_sections.bivariate_analysis import BivariateAnalysis
912
from edvart.report_sections.section_base import Verbosity
1013
from edvart.report_sections.univariate_analysis import UnivariateAnalysis
@@ -90,3 +93,17 @@ def test_show():
9093
warnings.simplefilter("ignore", UserWarning)
9194
with redirect_stdout(None):
9295
report.show()
96+
97+
98+
def test_exported_notebook_executes(tmp_path: pathlib.Path):
99+
report = Report(dataframe=_get_test_df())
100+
101+
report.add_overview()
102+
for export_data_mode in (ExportDataMode.EMBED, ExportDataMode.FILE):
103+
export_path = tmp_path / "export_{export_data_mode}.ipynb"
104+
report.export_notebook(export_path, export_data_mode=export_data_mode)
105+
106+
notebook = nbformat.read(export_path, as_version=4)
107+
preprocessor = nbconvert.preprocessors.ExecutePreprocessor(timeout=60)
108+
109+
preprocessor.preprocess(notebook)

0 commit comments

Comments
 (0)