1
1
import base64
2
2
import logging
3
+ import os
3
4
import pickle
4
5
import warnings
5
6
from abc import ABC
6
7
from copy import copy
8
+ from enum import Enum , auto
7
9
from typing import List , Optional , Tuple , Union
8
10
9
11
import isort
@@ -35,6 +37,14 @@ class EmptyReportWarning(UserWarning):
35
37
"""Warning raised when a report contains no sections."""
36
38
37
39
40
+ class ExportDataMode (str , Enum ):
41
+ """Data export mode for the report."""
42
+
43
+ NONE = auto ()
44
+ FILE = auto ()
45
+ EMBED = auto ()
46
+
47
+
38
48
class ReportBase (ABC ):
39
49
"""
40
50
Abstract base class for reports.
@@ -55,6 +65,8 @@ class ReportBase(ABC):
55
65
"import plotly.io as pio" ,
56
66
}
57
67
68
+ _DEFAULT_LOAD_DATA_CODE = "df = ... # TODO: Fill in code for loading data"
69
+
58
70
def __init__ (
59
71
self ,
60
72
dataframe : pd .DataFrame ,
@@ -84,27 +96,76 @@ def show(self) -> None:
84
96
for section in self .sections :
85
97
section .show (self .df )
86
98
99
+ def _export_data (
100
+ self , export_data_mode : ExportDataMode , notebook_file_path : Union [str , os .PathLike ]
101
+ ) -> Tuple [str , List [str ]]:
102
+ """
103
+ Generates code for loading exported data into the exported notebook.
104
+
105
+ Parameters
106
+ ----------
107
+ export_data_mode : ExportDataMode
108
+ The mode of exporting the data.
109
+ notebook_file_path : str or PathLike
110
+ Filepath of the exported notebook.
111
+
112
+ -------
113
+ Tuple[str, List[str]]
114
+ A tuple containing the code for loading the data and a list of imports required for
115
+ the code.
116
+ """
117
+ if export_data_mode == ExportDataMode .NONE :
118
+ return self ._DEFAULT_LOAD_DATA_CODE , []
119
+ if export_data_mode == ExportDataMode .FILE :
120
+ parquet_file_name = str (notebook_file_path ).rstrip (".ipynb" ) + "-data.parquet"
121
+ self .df .to_parquet (parquet_file_name )
122
+ return f"df = pd.read_parquet('{ parquet_file_name } ')" , ["import pandas as pd" ]
123
+ assert export_data_mode == ExportDataMode .EMBED
124
+ buffer = base64 .b85encode (self .df .to_parquet ())
125
+ return (
126
+ code_dedent (
127
+ f"""
128
+ df_parquet = BytesIO(base64.b85decode({ buffer } .decode()))
129
+ df = pd.read_parquet(df_parquet)"""
130
+ ),
131
+ ["import base64" , "import pandas as pd" , "from io import BytesIO" ],
132
+ )
133
+
87
134
def export_notebook (
88
135
self ,
89
- notebook_filepath : str ,
136
+ notebook_filepath : Union [ str , os . PathLike ] ,
90
137
dataset_name : str = "[INSERT DATASET NAME]" ,
91
138
dataset_description : str = "[INSERT DATASET DESCRIPTION]" ,
139
+ export_data_mode : ExportDataMode = ExportDataMode .NONE ,
92
140
) -> None :
93
141
"""Exports the report as an .ipynb file.
94
142
95
143
Parameters
96
144
----------
97
- notebook_filepath : str
145
+ notebook_filepath : str or PathLike
98
146
Filepath of the exported notebook.
99
147
dataset_name : str (default = "[INSERT DATASET NAME]")
100
148
Name of dataset to be used in the title of the report.
101
149
dataset_description : str (default = "[INSERT DATASET DESCRIPTION]")
102
150
Description of dataset to be used below the title of the report.
151
+ export_data_mode : ExportDataMode (default = ExportDataMode.NONE)
152
+ Mode for exporting the data to the notebook.
153
+ If ExportDataMode.NONE, the data is not exported to the notebook.
154
+ If ExportDataMode.FILE, the data is exported to a parquet file
155
+ and loaded from there.
156
+ If ExportDataMode.EMBED, the data is embedded into the notebook
157
+ as a base64 string.
103
158
"""
159
+ load_data_code , load_data_imports = self ._export_data (
160
+ export_data_mode , notebook_file_path = notebook_filepath
161
+ )
104
162
# Generate a notebook containing dataset name and description
105
163
self ._warn_if_empty ()
106
164
nb = self ._generate_notebook (
107
- dataset_name = dataset_name , dataset_description = dataset_description
165
+ dataset_name = dataset_name ,
166
+ dataset_description = dataset_description ,
167
+ load_df = load_data_code ,
168
+ extra_imports = load_data_imports ,
108
169
)
109
170
110
171
# Save notebook to file
@@ -113,9 +174,9 @@ def export_notebook(
113
174
114
175
def _generate_notebook (
115
176
self ,
177
+ load_df : str ,
116
178
dataset_name : str = "[INSERT DATASET NAME]" ,
117
179
dataset_description : str = "[INSERT DATASET DESCRIPTION]" ,
118
- load_df : str = "df = ..." ,
119
180
extra_imports : Optional [List [str ]] = None ,
120
181
show_load_data : bool = True ,
121
182
) -> nbf .NotebookNode :
0 commit comments