Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

select bias covariates and add default plotting order #16

Merged
merged 1 commit into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 37 additions & 29 deletions src/bopforge/categorical_pipeline/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
from argparse import ArgumentParser
from pathlib import Path

import bopforge.categorical_pipeline.functions as functions
import numpy as np
from bopforge.utils import fill_dict, ParseKwargs
from pplkit.data.interface import DataInterface

import bopforge.categorical_pipeline.functions as functions
from bopforge.utils import ParseKwargs, fill_dict

warnings.filterwarnings("ignore")


def pre_processing(result_folder: Path) -> None:
dataif = DataInterface(result=result_folder)
name = dataif.result.name
Expand Down Expand Up @@ -68,8 +70,8 @@ def fit_signal_model(result_folder: Path) -> None:
signal_model = functions.get_signal_model(settings, df)
signal_model.fit_model(outer_step_size=200, outer_max_iter=100)

df = functions.add_cols(df, signal_model)
df_coef = functions.get_coefs(all_settings, signal_model)
df = functions.convert_bc_to_em(df, signal_model)
df_coef = functions.get_coefs(df, all_settings, signal_model)

summary = functions.get_signal_model_summary(name, df, df_coef)

Expand All @@ -89,38 +91,40 @@ def fit_signal_model(result_folder: Path) -> None:
fig.savefig(dataif.result / "signal_model.pdf", bbox_inches="tight")


# def select_bias_covs(result_folder: Path) -> None:
# """Select the bias covariates. In this step, we first fit a linear model to
# get the prior strength of the bias-covariates. And then we use `CovFinder`
# to select important bias-covariates. A summary of the result will be
# generated and store in file `cov_finder_result.yaml`.
def select_bias_covs(result_folder: Path) -> None:
"""Select the bias covariates. In this step, we first fit a linear model to
get the prior strength of the bias-covariates. And then we use `CovFinder`
to select important bias-covariates. A summary of the result will be
generated and store in file `cov_finder_result.yaml`.

# Parameters
# ----------
# dataif
# Data interface in charge of file reading and writing.
Parameters
----------
dataif
Data interface in charge of file reading and writing.

# """
# dataif = DataInterface(result=result_folder)
# name = dataif.result.name
"""
dataif = DataInterface(result=result_folder)
name = dataif.result.name

# df = dataif.load_result(f"{name}.csv")
# df = df[df.is_outlier == 0].copy()
df = dataif.load_result(f"{name}.csv")
df = df[df.is_outlier == 0].copy()

# all_settings = dataif.load_result("settings.yaml")
# settings = all_settings["select_bias_covs"]
all_settings = dataif.load_result("settings.yaml")
settings = all_settings["select_bias_covs"]

# cov_finder_linear_model = dataif.load_result("signal_model.pkl")
cov_finder_linear_model = functions.get_cov_finder_linear_model(df)
cov_finder_linear_model.fit_model()

# cov_finder = functions.get_cov_finder(settings, cov_finder_linear_model)
# cov_finder.select_covs(verbose=True)
cov_finder = functions.get_cov_finder(settings, cov_finder_linear_model)
cov_finder.select_covs(verbose=True)

# cov_finder_result = functions.get_cov_finder_result(
# cov_finder_linear_model, cov_finder
# )
cov_finder_result = functions.get_cov_finder_result(
cov_finder_linear_model, cov_finder
)

# dataif.dump_result(cov_finder_result, "cov_finder_result.yaml")
# dataif.dump_result(cov_finder, "cov_finder.pkl")
dataif.dump_result(cov_finder_result, "cov_finder_result.yaml")
dataif.dump_result(cov_finder_linear_model, "cov_finder_linear_model.pkl")
dataif.dump_result(cov_finder, "cov_finder.pkl")


# def fit_linear_model(result_folder: Path) -> None:
Expand Down Expand Up @@ -222,7 +226,11 @@ def run(
def main(args=None) -> None:
parser = ArgumentParser(description="Categorical burden of proof pipeline.")
parser.add_argument(
"-i", "--input", type=os.path.abspath, required=True, help="Input data folder"
"-i",
"--input",
type=os.path.abspath,
required=True,
help="Input data folder",
)
parser.add_argument(
"-o",
Expand Down
Loading
Loading