1
1
import multiprocessing
2
2
import textwrap
3
3
import time
4
+ import traceback
4
5
from typing import TypedDict
5
6
6
7
import numpy as np
9
10
import streamlit as st
10
11
11
12
from core .data_types import MLC_DATA_TYPES
13
+ from core .data_types import mlc_to_str_data_type
12
14
from core .data_types import STR_DATA_TYPES
15
+ from core .data_types import str_to_mlc_data_type
13
16
from core .query_params import expand_record_set
14
17
from core .query_params import is_record_set_expanded
15
18
from core .state import Field
@@ -34,7 +37,16 @@ class _Result(TypedDict):
34
37
exception : Exception | None
35
38
36
39
37
- @st .cache_data (show_spinner = "Generating the dataset..." )
40
+ @st .cache_data (
41
+ show_spinner = "Generating the dataset..." ,
42
+ hash_funcs = {
43
+ "mlcroissant.Metadata" : hash ,
44
+ "mlcroissant.Field" : hash ,
45
+ "mlcroissant.FileObject" : hash ,
46
+ "mlcroissant.FileSet" : hash ,
47
+ "mlcroissant.RecordSet" : hash ,
48
+ },
49
+ )
38
50
def _generate_data_with_timeout (record_set : RecordSet ) -> _Result :
39
51
"""Generates the data and waits at most _TIMEOUT_SECONDS."""
40
52
with multiprocessing .Manager () as manager :
@@ -59,7 +71,7 @@ def _generate_data(record_set: RecordSet, result: _Result) -> pd.DataFrame | Non
59
71
"""Generates the first _NUM_RECORDS records."""
60
72
try :
61
73
metadata : Metadata = st .session_state [Metadata ]
62
- if not metadata :
74
+ if metadata is None :
63
75
raise ValueError (
64
76
"The dataset is still incomplete. Please, go to the overview to see"
65
77
" errors."
@@ -81,8 +93,8 @@ def _generate_data(record_set: RecordSet, result: _Result) -> pd.DataFrame | Non
81
93
pass
82
94
df .append (record )
83
95
result ["df" ] = pd .DataFrame (df )
84
- except Exception as exception :
85
- result ["exception" ] = exception
96
+ except Exception :
97
+ result ["exception" ] = traceback . format_exc ()
86
98
87
99
88
100
def _handle_close_fields ():
@@ -148,6 +160,10 @@ def _handle_create_record_set():
148
160
metadata .add_record_set (RecordSet (name = "new-record-set" , description = "" ))
149
161
150
162
163
+ def _handle_remove_record_set (record_set_key : int ):
164
+ del st .session_state [Metadata ].record_sets [record_set_key ]
165
+
166
+
151
167
def _handle_fields_change (record_set_key : int , record_set : RecordSet ):
152
168
expand_record_set (record_set = record_set )
153
169
data_editor_key = _data_editor_key (record_set_key , record_set )
@@ -166,12 +182,13 @@ def _handle_fields_change(record_set_key: int, record_set: RecordSet):
166
182
elif new_field == FieldDataFrame .DESCRIPTION :
167
183
field .description = new_value
168
184
elif new_field == FieldDataFrame .DATA_TYPE :
169
- field .data_types = [new_value ]
185
+ field .data_types = [str_to_mlc_data_type ( new_value ) ]
170
186
for added_row in result ["added_rows" ]:
187
+ data_type = str_to_mlc_data_type (added_row .get (FieldDataFrame .DATA_TYPE ))
171
188
field = Field (
172
189
name = added_row .get (FieldDataFrame .NAME ),
173
190
description = added_row .get (FieldDataFrame .DESCRIPTION ),
174
- data_types = [added_row . get ( FieldDataFrame . DATA_TYPE ) ],
191
+ data_types = [data_type ],
175
192
source = mlc .Source (),
176
193
references = mlc .Source (),
177
194
)
@@ -290,7 +307,7 @@ def _render_left_panel():
290
307
# TODO(https://github.com/mlcommons/croissant/issues/350): Allow to display
291
308
# several data types, not only the first.
292
309
data_types = [
293
- field .data_types [0 ] if field .data_types else None
310
+ mlc_to_str_data_type ( field .data_types [0 ]) if field .data_types else None
294
311
for field in record_set .fields
295
312
]
296
313
fields = pd .DataFrame (
@@ -359,6 +376,14 @@ def _render_left_panel():
359
376
on_click = _handle_on_click_field ,
360
377
args = (record_set_key , record_set ),
361
378
)
379
+ key = f"{ prefix } -delete-record-set"
380
+ st .button (
381
+ "⚠️ Delete RecordSet" ,
382
+ type = "primary" ,
383
+ key = key ,
384
+ on_click = _handle_remove_record_set ,
385
+ args = (record_set_key ,),
386
+ )
362
387
st .button (
363
388
"Create a new RecordSet" ,
364
389
key = f"create-new-record-set" ,
0 commit comments