Skip to content

Commit

Permalink
select bias covariates and add default plotting order
Browse files Browse the repository at this point in the history
  • Loading branch information
n-gilbertson committed Sep 6, 2024
1 parent d0df991 commit 68b857f
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 115 deletions.
66 changes: 37 additions & 29 deletions src/bopforge/categorical_pipeline/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
from argparse import ArgumentParser
from pathlib import Path

import bopforge.categorical_pipeline.functions as functions
import numpy as np
from bopforge.utils import fill_dict, ParseKwargs
from pplkit.data.interface import DataInterface

import bopforge.categorical_pipeline.functions as functions
from bopforge.utils import ParseKwargs, fill_dict

warnings.filterwarnings("ignore")


def pre_processing(result_folder: Path) -> None:
dataif = DataInterface(result=result_folder)
name = dataif.result.name
Expand Down Expand Up @@ -68,8 +70,8 @@ def fit_signal_model(result_folder: Path) -> None:
signal_model = functions.get_signal_model(settings, df)
signal_model.fit_model(outer_step_size=200, outer_max_iter=100)

df = functions.add_cols(df, signal_model)
df_coef = functions.get_coefs(all_settings, signal_model)
df = functions.convert_bc_to_em(df, signal_model)
df_coef = functions.get_coefs(df, all_settings, signal_model)

summary = functions.get_signal_model_summary(name, df, df_coef)

Expand All @@ -89,38 +91,40 @@ def fit_signal_model(result_folder: Path) -> None:
fig.savefig(dataif.result / "signal_model.pdf", bbox_inches="tight")


# def select_bias_covs(result_folder: Path) -> None:
# """Select the bias covariates. In this step, we first fit a linear model to
# get the prior strength of the bias-covariates. And then we use `CovFinder`
# to select important bias-covariates. A summary of the result will be
# generated and store in file `cov_finder_result.yaml`.
def select_bias_covs(result_folder: Path) -> None:
"""Select the bias covariates. In this step, we first fit a linear model to
get the prior strength of the bias-covariates. And then we use `CovFinder`
to select important bias-covariates. A summary of the result will be
generated and store in file `cov_finder_result.yaml`.
# Parameters
# ----------
# dataif
# Data interface in charge of file reading and writing.
Parameters
----------
dataif
Data interface in charge of file reading and writing.
# """
# dataif = DataInterface(result=result_folder)
# name = dataif.result.name
"""
dataif = DataInterface(result=result_folder)
name = dataif.result.name

# df = dataif.load_result(f"{name}.csv")
# df = df[df.is_outlier == 0].copy()
df = dataif.load_result(f"{name}.csv")
df = df[df.is_outlier == 0].copy()

# all_settings = dataif.load_result("settings.yaml")
# settings = all_settings["select_bias_covs"]
all_settings = dataif.load_result("settings.yaml")
settings = all_settings["select_bias_covs"]

# cov_finder_linear_model = dataif.load_result("signal_model.pkl")
cov_finder_linear_model = functions.get_cov_finder_linear_model(df)
cov_finder_linear_model.fit_model()

# cov_finder = functions.get_cov_finder(settings, cov_finder_linear_model)
# cov_finder.select_covs(verbose=True)
cov_finder = functions.get_cov_finder(settings, cov_finder_linear_model)
cov_finder.select_covs(verbose=True)

# cov_finder_result = functions.get_cov_finder_result(
# cov_finder_linear_model, cov_finder
# )
cov_finder_result = functions.get_cov_finder_result(
cov_finder_linear_model, cov_finder
)

# dataif.dump_result(cov_finder_result, "cov_finder_result.yaml")
# dataif.dump_result(cov_finder, "cov_finder.pkl")
dataif.dump_result(cov_finder_result, "cov_finder_result.yaml")
dataif.dump_result(cov_finder_linear_model, "cov_finder_linear_model.pkl")
dataif.dump_result(cov_finder, "cov_finder.pkl")


# def fit_linear_model(result_folder: Path) -> None:
Expand Down Expand Up @@ -222,7 +226,11 @@ def run(
def main(args=None) -> None:
parser = ArgumentParser(description="Categorical burden of proof pipeline.")
parser.add_argument(
"-i", "--input", type=os.path.abspath, required=True, help="Input data folder"
"-i",
"--input",
type=os.path.abspath,
required=True,
help="Input data folder",
)
parser.add_argument(
"-o",
Expand Down
Loading

0 comments on commit 68b857f

Please sign in to comment.