Skip to content

Commit

Permalink
Display RecordSets as a flat non-editable list.
Browse files Browse the repository at this point in the history
  • Loading branch information
marcenacp committed Nov 14, 2023
1 parent 59b8962 commit 6c5579b
Show file tree
Hide file tree
Showing 4 changed files with 84 additions and 35 deletions.
14 changes: 8 additions & 6 deletions editor/core/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,18 @@

import numpy as np

import mlcroissant as mlc


def convert_dtype(dtype: Any):
"""Converts from NumPy/Pandas to Croissant data types."""
if dtype == np.int64:
return "https://schema.org/Integer"
elif dtype == np.float64:
return "https://schema.org/Float"
if dtype == np.int64 or dtype == np.int32:
return mlc.DataType.INTEGER
elif dtype == np.float64 or dtype == np.float32:
return mlc.DataType.FLOAT
elif dtype == np.bool_:
return "https://schema.org/Boolean"
return mlc.DataType.BOOL
elif dtype == np.str_ or dtype == object:
return "https://schema.org/Text"
return mlc.DataType.TEXT
else:
raise NotImplementedError(dtype)
2 changes: 1 addition & 1 deletion editor/core/record_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def infer_record_sets(file: FileObject | FileSet, names: set[str]) -> list[Recor
)
field = Field(
name=column,
data_types=convert_dtype(value),
data_types=[convert_dtype(value)],
source=source,
references=mlc.Source(),
)
Expand Down
5 changes: 5 additions & 0 deletions editor/cypress/e2e/uploadCsv.cy.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,5 +45,10 @@ describe('Editor loads a local CSV as a resource', () => {
// On the record set page, we see the record set.
cy.get('[data-testid="stMarkdownContainer"]').contains('Record sets').click()
cy.contains('base.csv_record_set')
// We also see the fields with the proper types.
cy.get('[data-testid="stDataFrameResizable"]').contains("column1")
cy.get('[data-testid="stDataFrameResizable"]').contains("https://schema.org/Text")
cy.get('[data-testid="stDataFrameResizable"]').contains("column2")
cy.get('[data-testid="stDataFrameResizable"]').contains("https://schema.org/Integer")
})
})
98 changes: 70 additions & 28 deletions editor/views/record_sets.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,80 @@
import streamlit as st

from core.state import Metadata
from core.state import RecordSet
import mlcroissant as mlc
from utils import DF_HEIGHT
from utils import needed_field

DATA_TYPES = [
"https://schema.org/Text",
"https://schema.org/Float",
"https://schema.org/Integer",
"https://schema.org/Boolean",
mlc.DataType.TEXT,
mlc.DataType.FLOAT,
mlc.DataType.INTEGER,
mlc.DataType.BOOL,
]


class FieldDataFrame:
"""Names of the columns in the pd.DataFrame for `fields`."""

NAME = "Name"
DESCRIPTION = "Description"
DATA_TYPE = "Data type"


def render_record_sets():
if len(st.session_state[Metadata].record_sets) == 0:
st.markdown("Please add files first.")
else:
for record_set in st.session_state[Metadata].record_sets:
record_set_conv = pd.DataFrame(record_set.__dict__)
with st.container():
st.data_editor(
record_set_conv,
height=DF_HEIGHT,
use_container_width=True,
key=f"record_set_{record_set.name}",
column_config={
"name": st.column_config.TextColumn(
"name",
help="Name of the field",
required=True,
),
"description": st.column_config.TextColumn(
"description",
help="Description of the field",
required=False,
),
},
)
if not st.session_state[Metadata].distribution:
st.markdown("Please add resources first.")
return
record_set: RecordSet
for record_set in st.session_state[Metadata].record_sets:
with st.expander(f"**{record_set.name}**", expanded=True):
col1, col2 = st.columns([1, 3])
col1.text_input(
needed_field("Name"),
placeholder="Name without special character.",
key=f"{record_set.name}-name",
value=record_set.name,
)
col2.text_input(
"Description",
placeholder="Provide a clear description of the RecordSet.",
key=f"{record_set.name}-description",
value=record_set.description,
)
st.checkbox(
"Whether the RecordSet is an enumeration",
key=f"{record_set.name}-is-enumeration",
value=record_set.is_enumeration,
)
names = [field.name for field in record_set.fields]
descriptions = [field.description for field in record_set.fields]
data_types = [field.data_types[0] for field in record_set.fields]
fields = pd.DataFrame({
FieldDataFrame.NAME: names,
FieldDataFrame.DESCRIPTION: descriptions,
FieldDataFrame.DATA_TYPE: data_types,
})
st.data_editor(
fields,
height=DF_HEIGHT,
use_container_width=True,
column_config={
FieldDataFrame.NAME: st.column_config.TextColumn(
FieldDataFrame.NAME,
help="Name of the field",
required=True,
),
FieldDataFrame.DESCRIPTION: st.column_config.TextColumn(
FieldDataFrame.DESCRIPTION,
help="Description of the field",
required=False,
),
FieldDataFrame.DATA_TYPE: st.column_config.SelectboxColumn(
FieldDataFrame.DATA_TYPE,
help="The Croissant type",
options=DATA_TYPES,
required=True,
),
},
)

0 comments on commit 6c5579b

Please sign in to comment.