Skip to content

Commit

Permalink
add description of files
Browse files Browse the repository at this point in the history
  • Loading branch information
hlibbabii committed Sep 17, 2022
1 parent bf3fd8a commit bee4b80
Show file tree
Hide file tree
Showing 16 changed files with 64 additions and 22 deletions.
3 changes: 3 additions & 0 deletions bohrruntime/appconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from bohrruntime.storageengine import StorageEngine
from bohrruntime.util.paths import create_fs

"""
This class handles saving and loading values to/from BOHR config
"""

@dataclass(frozen=True)
class AppConfig:
Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/bohrconfigparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
from bohrruntime.tasktypes.labeling.core import LabelingTask
from bohrruntime.util.paths import create_fs

"""
Paring of BOHR config (bohr.py file).
"""


def convert_proxy_to_dataset(dataset_proxy: proxies.Dataset) -> Dataset:
dataset_type = type(dataset_proxy).__name__
Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/cli/bohr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@
from bohrruntime.storageengine import StorageEngine
from bohrruntime.util.logging import verbosity

"""
Definition of CLI commands like `bohr repro`
"""

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/cli/bohr_internal.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
from bohrruntime.storageengine import StorageEngine
from bohrruntime.util.profiler import Profiler

"""
Implementation of internal cli commands used by the pipeline manager to execute stages
"""

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


Expand Down
5 changes: 4 additions & 1 deletion bohrruntime/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
HeuristicURI,
)
from bohrruntime.pipeline import (
TemplateStage,
fetch_heuristics_if_needed,
get_params,
get_stage_list,
Expand All @@ -24,6 +23,10 @@
from bohrruntime.storageengine import StorageEngine
from bohrruntime.util.paths import AbsolutePath, create_fs

"""
Implementation of CLI commands, e.g. `bohr repro`
"""

logger = logging.getLogger(__name__)


Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/datamodel/bohrconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
from bohrruntime.datamodel.task import Task


"""
Implementation of BohrConfig (which is parsed from bohr.py)
"""

@dataclass(frozen=True)
class BohrConfig:
bohr_runtime_version: str
Expand Down
3 changes: 3 additions & 0 deletions bohrruntime/datamodel/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

DatapointList = Union[List[Artifact], List[Tuple[Artifact, Artifact]]]

"""
Implementation of the Dataset abstraction. Does not contain data, only metadata.
"""

@dataclass(frozen=True)
class Dataset(ABC):
Expand Down
3 changes: 3 additions & 0 deletions bohrruntime/datamodel/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@
# e.g. random or zero model, only the task determines how such model would behave
from bohrruntime.util.paths import AbsolutePath

"""
Implementation of Experiment concept
"""

@dataclass(frozen=True)
class Experiment:
Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/datamodel/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from fs.base import FS
from snorkel.analysis import Scorer

"""
BOHR's wrapper over snorkel's or other model
"""


@dataclass
class GroundTruthLabels:
Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/datamodel/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@
from bohrruntime.tasktypes.labeling.lfs import HeuristicApplier, SnorkelHeuristicApplier


"""
Implementation of TAsk, PreparedDataset, and DatasetPreparator concepts
"""

class PreparedDataset(ABC):
def save(self, subfs: FS) -> None:
pass
Expand Down
5 changes: 5 additions & 0 deletions bohrruntime/datasource.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,11 @@

from bohrruntime.datamodel.dataset import Dataset

"""
Code related to loading datasets from Artifact Explorer or from local file system.
When datasets are cached, this code is by-passsed.
"""


class DataSource:
def get_connection(self):
Expand Down
3 changes: 3 additions & 0 deletions bohrruntime/heuristics.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ def always_non_bubfix(commit: Commit) -> OneOrManyLabels:
return CommitLabel.NonBugFix
"""

"""
This file contains code related to locating and loading heuristics
"""

def get_template_heuristic() -> str:
return TEMPLATE_HEURISTIC
Expand Down
5 changes: 5 additions & 0 deletions bohrruntime/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@
from bohrruntime.tasktypes.labeling.lfs import SnorkelHeuristicApplier # TODO!!
from bohrruntime.util.paths import AbsolutePath, normalize_paths

"""
Implements classes for each stage of pipeline and their convertion to pipeline manager config
(DVC hardcoded as a pipeline manager for now)
"""

logger = logging.getLogger(__name__)


Expand Down
8 changes: 5 additions & 3 deletions bohrruntime/stages.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from typing import List, Optional
from typing import List

import numpy as np
import pandas as pd
from bohrapi.core import Heuristic
from bohrlabels.core import Label, OneOrManyLabels
from tqdm import tqdm

from bohrruntime.datamodel.dataset import Dataset
Expand All @@ -19,6 +17,10 @@
from bohrruntime.storageengine import StorageEngine
from bohrruntime.tasktypes.labeling.lfs import HeuristicApplier

"""
Implementation of stages of BOHR lifecycle without task-specific details
"""


def load_dataset(dataset: Dataset, storage_engine: StorageEngine) -> None:
print(f"Loading dataset: {dataset.id}")
Expand Down
4 changes: 4 additions & 0 deletions bohrruntime/storageengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@

logger = logging.getLogger(__name__)

"""
Handles saving and loading of input and outputs of all stages to a (possibly virtual) file system*
"""


@dataclass
class BohrPathStructure:
Expand Down
23 changes: 5 additions & 18 deletions bohrruntime/testtools.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from dataclasses import dataclass, field
from typing import List, Optional, Tuple, Type, Union
from typing import List, Optional, Tuple, Union

from bohrapi.artifacts import Commit
from bohrapi.core import Artifact, ArtifactType, HeuristicObj
Expand All @@ -11,24 +10,12 @@
from bohrruntime.datamodel.task import Task
from bohrruntime.heuristics import HeuristicLoader, HeuristicURI
from bohrruntime.storageengine import BohrPathStructure, StorageEngine

# class StubTask(Task):
# def load_ground_truth_labels(self, func):
# return None
#
# def get_preparator(self) -> DatasetPreparator:
# return None
#
# def get_model_trainer(self) -> ModelTrainer:
# return None
#
# def calculate_heuristic_output_metrics(self, heuristic_outputs: HeuristicOutputs, label_series: np.ndarray = None) -> Dict[str, Any]:
# return {}
#
# def is_grouping_task(self) -> bool:
# return False
from bohrruntime.tasktypes.labeling.core import LabelingTask

"""
Stub datamodel objects for tetsing
"""


class StubArtifact(Artifact):
def __init__(self):
Expand Down

0 comments on commit bee4b80

Please sign in to comment.