Skip to content

Commit

Permalink
TODO Reset and split into multiple commits
Browse files Browse the repository at this point in the history
  • Loading branch information
janezd committed Feb 24, 2023
1 parent 5330c9f commit 22856f2
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 56 deletions.
15 changes: 11 additions & 4 deletions Orange/widgets/data/owtable.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
)
from AnyQt.QtCore import pyqtSlot as Slot

from orangewidget.utils.signals import LazyValue

import Orange.data
from Orange.data.storage import Storage
from Orange.data.table import Table
Expand All @@ -42,7 +44,8 @@
from Orange.widgets.utils import datacaching
from Orange.widgets.utils.localization import pl
from Orange.widgets.utils.annotated_data import (create_annotated_table,
ANNOTATED_DATA_SIGNAL_NAME)
ANNOTATED_DATA_SIGNAL_NAME,
lazy_annotated_table)
from Orange.widgets.utils.itemmodels import TableModel
from Orange.widgets.utils.state_summary import format_summary_details

Expand Down Expand Up @@ -762,12 +765,16 @@ def select_vars(role):

# Send all data by default
if not rowsel:
selected_data = table
selected_data = LazyValue[Table](
lambda: print("selecting") or table[model.mapToSourceRows(...)],
length=model.rowCount(), domain=table.domain)
else:
selected_data = table.from_table(domain, table, rowsel)
selected_data = LazyValue[Table](
lambda: table.from_table(domain, table, rowsel),
length=len(rowsel), domain=table.domain)

self.Outputs.selected_data.send(selected_data)
self.Outputs.annotated_data.send(create_annotated_table(table, rowsel))
self.Outputs.annotated_data.send(lazy_annotated_table(table, rowsel))

def copy(self):
"""
Expand Down
26 changes: 13 additions & 13 deletions Orange/widgets/unsupervised/owhierarchicalclustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from Orange.widgets import widget, gui, settings
from Orange.widgets.utils import itemmodels, combobox
from Orange.widgets.utils.annotated_data import (create_annotated_table,
from Orange.widgets.utils.annotated_data import (lazy_annotated_table,
ANNOTATED_DATA_SIGNAL_NAME)
from Orange.widgets.utils.widgetpreview import WidgetPreview
from Orange.widgets.visualize.utils.plotutils import AxisItem
Expand Down Expand Up @@ -778,34 +778,34 @@ def commit(self):

if not selected_indices:
self.Outputs.selected_data.send(None)
annotated_data = create_annotated_table(items, []) \
annotated_data = lazy_annotated_table(items, []) \
if self.selection_method == 0 and self.matrix.axis else None
self.Outputs.annotated_data.send(annotated_data)
return

selected_data = None

if isinstance(items, Orange.data.Table) and self.matrix.axis == 1:
# Select rows
c = np.zeros(self.matrix.shape[0])

for i, indices in enumerate(maps):
c[indices] = i
c[unselected_indices] = len(maps)

mask = c != len(maps)

data, domain = items, items.domain
attrs = domain.attributes
classes = domain.class_vars
metas = domain.metas

var_name = get_unique_names(domain, "Cluster")
values = [f"C{i + 1}" for i in range(len(maps))]

clust_var = Orange.data.DiscreteVariable(
var_name, values=values + ["Other"])
domain = Orange.data.Domain(attrs, classes, metas + (clust_var,))

# Select rows
c = np.zeros(self.matrix.shape[0])

for i, indices in enumerate(maps):
c[indices] = i
c[unselected_indices] = len(maps)

mask = c != len(maps)

data = items.transform(domain)
with data.unlocked(data.metas):
data.set_column(clust_var, c)
Expand All @@ -817,7 +817,7 @@ def commit(self):
selected_data.domain = Domain(
attrs, classes, metas + (clust_var, ))

annotated_data = create_annotated_table(data, selected_indices)
annotated_data = lazy_annotated_table(data, selected_indices)

elif isinstance(items, Orange.data.Table) and self.matrix.axis == 0:
# Select columns
Expand Down
34 changes: 30 additions & 4 deletions Orange/widgets/utils/annotated_data.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import numpy as np
from Orange.data import Domain, DiscreteVariable

from AnyQt.QtCore import Qt

from orangewidget.utils.signals import LazyValue

from Orange.data import Domain, DiscreteVariable, Table
from Orange.data.util import get_unique_names

ANNOTATED_DATA_SIGNAL_NAME = "Data"
Expand Down Expand Up @@ -30,16 +35,22 @@ def add_columns(domain, attributes=(), class_vars=(), metas=()):
return Domain(attributes, class_vars, metas)


def _table_with_annotation_column(data, values, column_data, var_name):
def _domain_with_annotation_column(data, values, var_name):
var = DiscreteVariable(get_unique_names(data.domain, var_name), values)
class_vars, metas = data.domain.class_vars, data.domain.metas
if not data.domain.class_vars:
class_vars += (var, )
column_data = column_data.reshape((len(data), ))
else:
metas += (var, )
return Domain(data.domain.attributes, class_vars, metas), var


def _table_with_annotation_column(data, values, column_data, var_name):
domain, var = _domain_with_annotation_column(data, values, var_name)
if not data.domain.class_vars:
column_data = column_data.reshape((len(data), ))
else:
column_data = column_data.reshape((len(data), 1))
domain = Domain(data.domain.attributes, class_vars, metas)
table = data.transform(domain)
with table.unlocked(table.Y if not data.domain.class_vars else table.metas):
table[:, var] = column_data
Expand All @@ -65,6 +76,14 @@ def create_annotated_table(data, selected_indices):
data, ("No", "Yes"), annotated, ANNOTATED_DATA_FEATURE_NAME)


def lazy_annotated_table(data, selected_indices):
domain, _ = _domain_with_annotation_column(
data, ("No", "Yes"), ANNOTATED_DATA_FEATURE_NAME)
return LazyValue[Table](
lambda: create_annotated_table(data, selected_indices),
length=len(data), domain=domain)


def create_groups_table(data, selection,
include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME,
Expand All @@ -88,3 +107,10 @@ def create_groups_table(data, selection,
data = data[mask]
selection = selection[mask] - 1
return _table_with_annotation_column(data, values, selection, var_name)


def lazy_groups_table(data, selection, include_unselected=True,
var_name=ANNOTATED_DATA_FEATURE_NAME, values=None):
return LazyValue[Table](
lambda: create_groups_table(data, selection, include_unselected,
var_name, values))
85 changes: 50 additions & 35 deletions Orange/widgets/utils/state_summary.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from datetime import date
from html import escape
from typing import Union

from AnyQt.QtCore import Qt

from Orange.widgets.utils.localization import pl
from orangewidget.utils.signals import summarize, PartialSummary
from orangewidget.utils.signals import summarize, PartialSummary, LazyValue
from Orange.widgets.utils.itemmodels import TableModel
from Orange.widgets.utils.tableview import TableView
from Orange.widgets.utils.distmatrixmodel import \
DistMatrixModel, DistMatrixView

from Orange.data import (
StringVariable, DiscreteVariable, ContinuousVariable, TimeVariable,
Table
Table, Domain
)

from Orange.evaluation import Results
Expand Down Expand Up @@ -62,51 +63,49 @@ def format_variables_string(variables):


# `format` is a good name for the argument, pylint: disable=redefined-builtin
def format_summary_details(data, format=Qt.PlainText):
def format_summary_details(data: Union[Table, Domain],
format=Qt.PlainText, missing=None):
"""
A function that forms the entire descriptive part of the input/output
summary.
:param data: A dataset
:type data: Orange.data.Table
:type data: Orange.data.Table or Orange.data.Domain
:return: A formatted string
"""
if data is None:
return ""

if format == Qt.PlainText:
def b(s):
return s
else:
def b(s):
return f"<b>{s}</b>"

features_missing = ""
if data.X.size < COMPUTE_NANS_LIMIT:
features_missing = missing_values(data.get_nan_frequency_attribute())
n_features = len(data.domain.variables) + len(data.domain.metas)
name = getattr(data, "name", None)
if name == "untitled":
features_missing = "" if missing is None else missing_values(missing)
if isinstance(data, Domain):
domain = data
name = None

basic = f'{len(data):n} {pl(len(data), "instance")}, ' \
f'{n_features} {pl(n_features, "variable")}'

features = format_variables_string(data.domain.attributes)
basic = ""
else:
domain = data.domain
if not features_missing and data.X.size < COMPUTE_NANS_LIMIT:
features_missing = \
" " + missing_values(data.get_nan_frequency_attribute())
name = getattr(data, "name", None)
if name == "untitled":
name = None
basic = f'{len(data):n} {pl(len(data), "instance")}, '

features = format_variables_string(domain.attributes)
features = f'Features: {features} {features_missing}'

targets = format_variables_string(data.domain.class_vars)
targets = format_variables_string(domain.class_vars)
targets = f'Target: {targets}'

metas = format_variables_string(data.domain.metas)
metas = format_variables_string(domain.metas)
metas = f'Metas: {metas}'

if format == Qt.PlainText:
details = ""
if name:
details += f"{name}: "
details += f"{basic}\n{features}\n{targets}"
if data.domain.metas:
if domain.metas:
details += f"\n{metas}"
else:
descs = []
Expand All @@ -115,11 +114,11 @@ def b(s):
else:
descs.append(_nobr(basic))

if data.domain.variables:
if domain.variables:
descs.append(_nobr(features))
if data.domain.class_vars:
if domain.class_vars:
descs.append(_nobr(targets))
if data.domain.metas:
if domain.metas:
descs.append(_nobr(metas))

details = '<br/>'.join(descs)
Expand Down Expand Up @@ -173,16 +172,32 @@ def _nobr(s):


@summarize.register
def summarize_(data: Table): # pylint: disable=function-redefined
def previewer():
view = TableView(selectionMode=TableView.NoSelection)
view.setModel(TableModel(data))
return view

def summarize_table(data: Table):
return PartialSummary(
data.approx_len(),
format_summary_details(data, format=Qt.RichText),
previewer)
lambda: _table_previewer(data))


@summarize.register
def summarize_table(data: LazyValue[Table]):
if data.is_cached:
return summarize(data.get_value())

length = getattr(data, "length", "?")
details = format_summary_details(data.domain, format=Qt.RichText,
missing=getattr(data, "missing", None)) \
if hasattr(data, "domain") else "data available, but not prepared yet"
return PartialSummary(
length,
details,
lambda: _table_previewer(data.get_value()))


def _table_previewer(data):
view = TableView(selectionMode=TableView.NoSelection)
view.setModel(TableModel(data))
return view


@summarize.register
Expand Down

0 comments on commit 22856f2

Please sign in to comment.