-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* #232: Redesigned SQLStageInputOutput * Added docstring to class Dataset * Change key of dict Dependencies to object
- Loading branch information
Showing
9 changed files
with
76 additions
and
354 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
26 changes: 0 additions & 26 deletions
26
exasol/analytics/query_handler/graph/stage/sql/data_partition.py
This file was deleted.
Oops, something went wrong.
62 changes: 13 additions & 49 deletions
62
exasol/analytics/query_handler/graph/stage/sql/dataset.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,56 +1,20 @@ | ||
import dataclasses | ||
from enum import Enum | ||
from typing import Dict, List, Tuple, Union | ||
from dataclasses import dataclass, field | ||
from typing import List | ||
|
||
from exasol.analytics.query_handler.graph.stage.sql.data_partition import DataPartition | ||
from exasol.analytics.schema import Column | ||
from exasol.analytics.utils.data_classes_runtime_type_check import check_dataclass_types | ||
from exasol.analytics.query_handler.graph.stage.sql.dependency import Dependencies | ||
from exasol.analytics.schema import Column, TableLike | ||
|
||
DataPartitionName = Union[Enum, Tuple[Enum, int]] | ||
|
||
|
||
@dataclasses.dataclass(frozen=True) | ||
@dataclass(frozen=True) | ||
class Dataset: | ||
""" | ||
A Dataset consists of multiple data partitions and column lists which indicate the identifier, | ||
sample and target columns, The data paritions can be used to describe train and test sets. | ||
""" | ||
A Dataset consists of a TableLike, column lists indicating the | ||
identifier and other columns, and optional dependencies. | ||
data_partitions: Dict[DataPartitionName, DataPartition] | ||
The TableLike refers to a database table containing the actual data that | ||
can be used for instance in training or testing. | ||
""" | ||
table_like: TableLike | ||
identifier_columns: List[Column] | ||
sample_columns: List[Column] | ||
target_columns: List[Column] | ||
|
||
def __post_init__(self): | ||
check_dataclass_types(self) | ||
self._check_table_name() | ||
self._check_columns() | ||
|
||
def _check_table_name(self): | ||
all_table_like_names = { | ||
data_partition.table_like.name | ||
for data_partition in self.data_partitions.values() | ||
} | ||
if len(all_table_like_names) != len(self.data_partitions): | ||
raise ValueError( | ||
"The names of table likes of the data partitions should be different." | ||
) | ||
|
||
def _check_columns(self): | ||
all_columns = { | ||
column | ||
for data_partition in self.data_partitions.values() | ||
for column in data_partition.table_like.columns | ||
} | ||
all_data_partition_have_same_columns = all( | ||
len(data_partition.table_like.columns) == len(all_columns) | ||
for data_partition in self.data_partitions.values() | ||
) | ||
if not all_data_partition_have_same_columns: | ||
raise ValueError("Not all data partitions have the same columns.") | ||
if not all_columns.issuperset(self.sample_columns): | ||
raise ValueError("Not all sample columns in data partitions.") | ||
if not all_columns.issuperset(self.target_columns): | ||
raise ValueError("Not all target columns in data partitions.") | ||
if not all_columns.issuperset(self.identifier_columns): | ||
raise ValueError("Not all identifier columns in data partitions.") | ||
columns: List[Column] | ||
dependencies: Dependencies = field(default_factory=dict) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.