Skip to content

Commit

Permalink
Merge branch 'master' into pomegranate_elimination
Browse files Browse the repository at this point in the history
  • Loading branch information
Roman223 authored Jul 19, 2023
2 parents 5b43176 + 384e443 commit 6f52a03
Show file tree
Hide file tree
Showing 13 changed files with 1,912 additions and 692 deletions.
5 changes: 5 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
- | |license|
* - stats
- | |downloads_stats| |downloads_monthly| |downloads_weekly|
* - style
- | |Black|

Repository of a data modeling and analysis tool based on Bayesian networks

Expand Down Expand Up @@ -241,3 +243,6 @@ Citation

.. |coverage| image:: https://codecov.io/github/aimclub/BAMT/branch/master/graph/badge.svg?token=fA4qsxGqTC
:target: https://codecov.io/github/aimclub/BAMT

.. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg
.. _Black: https://github.com/psf/black
26 changes: 13 additions & 13 deletions tests/BigbraveBNTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,48 +13,48 @@

encoder = preprocessing.LabelEncoder()
discretizer = preprocessing.KBinsDiscretizer(
n_bins=5, encode='ordinal', strategy='uniform')
n_bins=5, encode="ordinal", strategy="uniform"
)

p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
p = pp.Preprocessor([("encoder", encoder), ("discretizer", discretizer)])
discretized_data, est = p.apply(data_discrete)

info = p.info

space_restrictor = BigBraveBN()

space_restrictor.set_possible_edges_by_brave(
df=data_discrete)
space_restrictor.set_possible_edges_by_brave(df=data_discrete)

ps = space_restrictor.possible_edges

bn_discrete = DiscreteBN()

bn_discrete.add_nodes(descriptor=info)

params = {'white_list': ps}
bn_discrete.add_edges(discretized_data, scoring_function=(
'K2', K2Score), params=params)
params = {"white_list": ps}
bn_discrete.add_edges(discretized_data, scoring_function=("K2", K2Score), params=params)

encoder = preprocessing.LabelEncoder()
discretizer = preprocessing.KBinsDiscretizer(
n_bins=5, encode='ordinal', strategy='uniform')
n_bins=5, encode="ordinal", strategy="uniform"
)

p = pp.Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
p = pp.Preprocessor([("encoder", encoder), ("discretizer", discretizer)])
discretized_data, est = p.apply(data_continuous)

info = p.info

space_restrictor = BigBraveBN()

space_restrictor.set_possible_edges_by_brave(
df=data_continuous)
space_restrictor.set_possible_edges_by_brave(df=data_continuous)

ps = space_restrictor.possible_edges

bn_continuous = ContinuousBN()

bn_continuous.add_nodes(descriptor=info)

params = {'white_list': ps}
params = {"white_list": ps}
bn_continuous.add_edges(
discretized_data, scoring_function=('K2', K2Score), params=params)
discretized_data, scoring_function=("K2", K2Score), params=params
)
27 changes: 19 additions & 8 deletions tests/LoadBN.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,23 @@
import json

hack_data = pd.read_csv("data/real data/hack_processed_with_rf.csv")[
['Tectonic regime', 'Period', 'Lithology', 'Structural setting',
'Gross', 'Netpay', 'Porosity', 'Permeability', 'Depth']]
[
"Tectonic regime",
"Period",
"Lithology",
"Structural setting",
"Gross",
"Netpay",
"Porosity",
"Permeability",
"Depth",
]
]

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(
n_bins=5, encode='ordinal', strategy='uniform')
discretizer = pp.KBinsDiscretizer(n_bins=5, encode="ordinal", strategy="uniform")

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
p = Preprocessor([("encoder", encoder), ("discretizer", discretizer)])

discretized_data, est = p.apply(hack_data)

Expand All @@ -21,9 +30,11 @@

bn.add_nodes(info)

structure = [("Tectonic regime", "Structural setting"),
("Gross", "Netpay"),
("Lithology", "Permeability")]
structure = [
("Tectonic regime", "Structural setting"),
("Gross", "Netpay"),
("Lithology", "Permeability"),
]

bn.set_structure(edges=structure)

Expand Down
70 changes: 38 additions & 32 deletions tests/MainTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,29 @@
from bamt.preprocessors import Preprocessor
import bamt.networks as Networks

'''
"""
Optional:
You can also uncomment print() that you need.
'''
"""

hack_data = pd.read_csv("../data/real data/hack_processed_with_rf.csv")
cont_data = hack_data[['Gross', 'Netpay', 'Porosity',
'Permeability', 'Depth']].dropna()
disc_data = hack_data[['Tectonic regime', 'Period',
'Lithology', 'Structural setting']].dropna()
hybrid_data = hack_data[['Tectonic regime', 'Period',
'Lithology', 'Structural setting',
'Gross', 'Netpay', 'Porosity',
'Permeability', 'Depth']].dropna()
cont_data = hack_data[["Gross", "Netpay", "Porosity", "Permeability", "Depth"]].dropna()
disc_data = hack_data[
["Tectonic regime", "Period", "Lithology", "Structural setting"]
].dropna()
hybrid_data = hack_data[
[
"Tectonic regime",
"Period",
"Lithology",
"Structural setting",
"Gross",
"Netpay",
"Porosity",
"Permeability",
"Depth",
]
].dropna()

cont_test_data = cont_data[cont_data.columns[:-1]]
cont_target = cont_data[cont_data.columns[-1]]
Expand All @@ -29,27 +38,24 @@
hybrid_target = hybrid_data[hybrid_data.columns[-1]]

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(n_bins=5,
encode='ordinal',
strategy='uniform')
p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
discretizer = pp.KBinsDiscretizer(n_bins=5, encode="ordinal", strategy="uniform")
p = Preprocessor([("encoder", encoder), ("discretizer", discretizer)])

# Discrete pipeline
discretized_data, _ = p.apply(disc_data)
disc_bn = Networks.DiscreteBN()
info = p.info
disc_bn.add_nodes(info)
disc_bn.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
disc_bn.add_edges(data=discretized_data, scoring_function=("K2", K2Score))
disc_bn.fit_parameters(data=disc_data)
disc_bn.calculate_weights(discretized_data)
disc_predicted_values = disc_bn.predict(test=disc_test_data)
disc_predicted_values = pd.DataFrame.from_dict(
disc_predicted_values, orient='columns')
disc_predicted_values = pd.DataFrame.from_dict(disc_predicted_values, orient="columns")
synth_disc_data = disc_bn.sample(50)

disc_bn.save('./disc_bn.json')
disc_bn.save("./disc_bn.json")
disc_bn2 = Networks.DiscreteBN()
disc_bn2.load('./disc_bn.json')
disc_bn2.load("./disc_bn.json")
synth_disc_data2 = disc_bn2.sample(50)
# print(disc_bn.weights)
# print(disc_bn2.weights)
Expand All @@ -63,17 +69,16 @@
cont_bn = Networks.ContinuousBN(use_mixture=True)
info = p.info
cont_bn.add_nodes(info)
cont_bn.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
cont_bn.add_edges(data=discretized_data, scoring_function=("K2", K2Score))
cont_bn.fit_parameters(data=cont_data)
cont_bn.calculate_weights(discretized_data)
cont_predicted_values = cont_bn.predict(test=cont_test_data)
cont_predicted_values = pd.DataFrame.from_dict(
cont_predicted_values, orient='columns')
cont_predicted_values = pd.DataFrame.from_dict(cont_predicted_values, orient="columns")
synth_cont_data = cont_bn.sample(50)

cont_bn.save('./cont_bn.json')
cont_bn.save("./cont_bn.json")
cont_bn2 = Networks.ContinuousBN(use_mixture=True)
cont_bn2.load('./cont_bn.json')
cont_bn2.load("./cont_bn.json")
synth_cont_data2 = cont_bn2.sample(50)
# print(cont_bn.weights)
# print(cont_bn2.weights)
Expand All @@ -91,21 +96,22 @@
info = p.info
hybrid_bn.add_nodes(info)
hybrid_bn2.add_nodes(info)
hybrid_bn.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
hybrid_bn2.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
hybrid_bn.add_edges(data=discretized_data, scoring_function=("K2", K2Score))
hybrid_bn2.add_edges(data=discretized_data, scoring_function=("K2", K2Score))
hybrid_bn.fit_parameters(data=hybrid_data)
hybrid_bn2.fit_parameters(data=hybrid_data)
hybrid_bn.calculate_weights(discretized_data)
hybrid_bn2.calculate_weights(discretized_data)
hybrid_predicted_values = hybrid_bn.predict(test=hybrid_test_data)
hybrid_predicted_values = pd.DataFrame.from_dict(
hybrid_predicted_values, orient='columns')
hybrid_predicted_values, orient="columns"
)
synth_hybrid_data = hybrid_bn.sample(50)
synth_hybrid_data2 = hybrid_bn2.sample(50)

hybrid_bn.save('./hybrid_bn.json')
hybrid_bn.save("./hybrid_bn.json")
hybrid_bn3 = Networks.HybridBN(use_mixture=True)
hybrid_bn3.load('./hybrid_bn.json')
hybrid_bn3.load("./hybrid_bn.json")
synth_hybrid_data3 = hybrid_bn3.sample(50)
# print(hybrid_bn.weights)
# print(hybrid_bn2.weights)
Expand All @@ -124,9 +130,9 @@
hybrid_bn = Networks.HybridBN(use_mixture=True)
info = p.info
hybrid_bn.add_nodes(info)
hybrid_bn.add_edges(data=discretized_data, scoring_function=('K2', K2Score))
hybrid_bn.add_edges(data=discretized_data, scoring_function=("K2", K2Score))
hybrid_bn.fit_parameters(data=hybrid_data)
hybrid_bn.save('./hybrid_bn_without_weights.json')
hybrid_bn.save("./hybrid_bn_without_weights.json")
hybrid_bn2 = Networks.HybridBN(use_mixture=True)
hybrid_bn2.load('./hybrid_bn_without_weights.json')
hybrid_bn2.load("./hybrid_bn_without_weights.json")
# print(hybrid_bn2.weights)
34 changes: 16 additions & 18 deletions tests/MetricsTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,16 @@

h = pd.read_csv("data/real data/hack_processed_with_rf.csv")
cols = [
'Tectonic regime',
'Period',
'Lithology',
'Structural setting',
'Gross',
'Netpay',
'Porosity',
'Permeability',
'Depth']
"Tectonic regime",
"Period",
"Lithology",
"Structural setting",
"Gross",
"Netpay",
"Porosity",
"Permeability",
"Depth",
]
h = h[cols]

print(h.describe())
Expand All @@ -29,33 +30,30 @@
print(f"Time elapsed for preparing data: {p2 - p1}")

encoder = pp.LabelEncoder()
discretizer = pp.KBinsDiscretizer(
n_bins=5,
encode='ordinal',
strategy='quantile')
discretizer = pp.KBinsDiscretizer(n_bins=5, encode="ordinal", strategy="quantile")

p = Preprocessor([('encoder', encoder), ('discretizer', discretizer)])
p = Preprocessor([("encoder", encoder), ("discretizer", discretizer)])

# -----------
discrete_data, est = p.apply(h)
info = p.info

bn = Networks.HybridBN(has_logit=True) # all may vary
bn.add_nodes(descriptor=info)
bn.add_edges(data=discrete_data, optimizer='HC', scoring_function=('MI',))
bn.add_edges(data=discrete_data, optimizer="HC", scoring_function=("MI",))

bn.get_info(as_df=False)
t1 = time.time()
bn.fit_parameters(data=h)
t2 = time.time()
print(f'PL elapsed: {t2 - t1}')
print(f"PL elapsed: {t2 - t1}")

columns = ['Lithology', 'Structural setting', 'Porosity', 'Depth']
columns = ["Lithology", "Structural setting", "Porosity", "Depth"]
validY = h[columns].dropna()
validX = h.drop(columns, axis=1).dropna()

time_1 = time.time()
pred_param = bn.predict(validX, parall_count=3)
time_2 = time.time()
print(pred_param)
print(f'Predict elapsed: {time_2 - time_1}')
print(f"Predict elapsed: {time_2 - time_1}")
Loading

0 comments on commit 6f52a03

Please sign in to comment.