diff --git a/Bgolearn/BGOsampling.py b/Bgolearn/BGOsampling.py index 883bb4f..a960088 100755 --- a/Bgolearn/BGOsampling.py +++ b/Bgolearn/BGOsampling.py @@ -1,15 +1,18 @@ import inspect import os import time +from typing import Any import warnings import numpy as np import pandas as pd import copy -from typing import Union -from .BGOmax import Global_max -from .BGOmin import Global_min -from .BGOclf import Boundary -from .BGO_eval import BGO_Efficient +import datetime +from art import text2art +from sklearn.utils import resample +from .BgolearnFuns.BGOmax import Global_max +from .BgolearnFuns.BGOmin import Global_min +from .BgolearnFuns.BGOclf import Boundary +from .BgolearnFuns.BGO_eval import BGO_Efficient from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import LeaveOneOut from sklearn.metrics import r2_score @@ -18,19 +21,49 @@ from sklearn.gaussian_process import GaussianProcessRegressor from sklearn.gaussian_process.kernels import RBF, WhiteKernel from sklearn.model_selection import KFold +from sklearn.svm import SVR +from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor +from sklearn.neural_network import MLPRegressor class Bgolearn(object): - def fit(self,data_matrix, Measured_response, virtual_samples, Mission ='Regression', Classifier = 'GaussianProcess',noise_std = None, Kriging_model = None, opt_num = 1 ,min_search = True, CV_test = False, )-> Union[Boundary, Global_max, Global_min]: + def __init__(self) -> None: + os.makedirs('Bgolearn', exist_ok=True) + + now = datetime.datetime.now() + formatted_date_time = now.strftime('%Y-%m-%d %H:%M:%S') + print(text2art("Bgolearn")) + print('Package Name : Bgolearn') + print('Author : Bin CAO, HKUST(GZ)') + print('Intro : https://bgolearn.netlify.app/') + print('URL : https://github.com/Bin-Cao/Bgolearn') + print("Citation Format Suggestion:") + print('[Bin CAO et al]. "Active learning accelerates the discovery of high strength and high ductility lead-free solder alloys", [2023], [DOI : http://dx.doi.org/10.2139/ssrn.4686075].') + print('Executed on :',formatted_date_time, ' | Have a great day.') + print('='*80) + + + def fit(self,data_matrix, Measured_response, virtual_samples, Mission ='Regression', Classifier = 'GaussianProcess',noise_std = None, Kriging_model = None, opt_num = 1 ,min_search = True, CV_test = False, Dynamic_W = False,seed=42): """ ================================================================ - PACKAGE: Bayesian global optimization-learn (Bgolearn) . + PACKAGE: Bayesian global optimization-learn (Bgolearn) package . + Author: Bin CAO + Guangzhou Municipal Key Laboratory of Materials Informatics, Advanced Materials Thrust, + Hong Kong University of Science and Technology (Guangzhou), Guangzhou 511400, Guangdong, China ================================================================ + Please feel free to open issues in the Github : + https://github.com/Bin-Cao/Bgolearn + or + contact Mr.Bin Cao (bcao686@connect.hkust-gz.edu.cn) + in case of any problems/comments/suggestions in using the code. + ================================================================== Thank you for choosing Bgolearn for material design. Bgolearn is developed to facilitate the application of machine learning in research. + Bgolearn is designed for optimizing single-target material properties. - If you need to perform multi-target optimization, here are two important reminders: + The BgoKit package is being developed to facilitate multi-task design. + If you need to perform multi-target optimization, here are two kind reminders: 1. Multi-tasks can be converted into a single task using domain knowledge. For example, you can use a weighted linear combination in the simplest situation. That is, y = w*y1 + y2... @@ -47,8 +80,9 @@ def fit(self,data_matrix, Measured_response, virtual_samples, Mission ='Regressi I am delighted to invite you to participate in the development of Bgolearn. If you have any issues or suggestions, please feel free to contact me at binjacobcao@gmail.com. ================================================================ - - Bin Cao, Advanced Materials Thrust, Hong Kong University of Science and Technology (Guangzhou). + Reference : + document : https://bgolearn.netlify.app/ + ================================================================ :param data_matrix: data matrix of training dataset, X . @@ -78,12 +112,19 @@ def fit(self,data_matrix, Measured_response, virtual_samples, Mission ='Regressi if noise_std is not None, a noise value will be estimated by maximum likelihood on training dataset. - :param Kriging_model (default None): a user defined callable Kriging model, has an attribute of + :param Kriging_model (default None): + str, Kriging_model = 'SVM', 'RF', 'AdaB', 'MLP' + The machine learning models will be implemented: Support Vector Machine (SVM), + Random Forest(RF), AdaBoost(AdaB), and Multi-Layer Perceptron (MLP). + The estimation uncertainity will be determined by Boostsrap sampling. + or + a user defined callable Kriging model, has an attribute of if user isn't applied one, Bgolearn will call a pre-set Kriging model atribute : input -> xtrain, ytrain, xtest ; output -> predicted mean and std of xtest - e.g. (take GaussianProcessRegressor in sklearn as an example): + + e.g. (take GaussianProcessRegressor in sklearn): class Kriging_model(object): def fit_pre(self,xtrain,ytrain,xtest): # instantiated model @@ -93,6 +134,20 @@ def fit_pre(self,xtrain,ytrain,xtest): mean,std = mdoel.predict(xtest,return_std=True) return mean,std + e.g. (MultiModels estimations): + class Kriging_model(object): + def fit_pre(self,xtrain,ytrain,xtest): + # instantiated model + pre_1 = SVR(C=10).fit(xtrain,ytrain).predict(xtest) # model_1 + pre_2 = SVR(C=50).fit(xtrain,ytrain).predict(xtest) # model_2 + pre_3 = SVR(C=80).fit(xtrain,ytrain).predict(xtest) # model_3 + model_1 , model_2 , model_3 can be changed to any ML models you desire + # defined the attribute's outputs + stacked_array = np.vstack((pre_1,pre_2,pre_3)) + means = np.mean(stacked_array, axis=0) + std = np.sqrt(np.var(stacked_array), axis=0) + return mean, std + :param opt_num: the number of recommended candidates for next iteration, default 1. :param min_search: default True -> searching the global minimum ; @@ -105,11 +160,17 @@ def fit_pre(self,xtrain,ytrain,xtest): :return: 1: array; potential of each candidate. 2: array/float; recommended candidate(s). """ - + # Fit and transform the input data matrix + Xname = data_matrix.columns virtual_samples = preprocess_data(virtual_samples) data_matrix = preprocess_data(data_matrix) Measured_response = preprocess_data(Measured_response) + if Dynamic_W == False : + pass + elif Dynamic_W == True : + data_matrix, Measured_response = Resampling(data_matrix,Measured_response,min_search,seed ) + row_features = copy.deepcopy(virtual_samples) scaler = MinMaxScaler() @@ -181,6 +242,14 @@ def fit_pre(self,xtrain,ytrain,xtest,ret_std = 0.0): mean,std = mdoel.predict(xtest,return_std=True) return mean,std print('The internal model is instantiated with heterogenous noise') + elif type(Kriging_model) == str: + model_type = Kriging_model + class Kriging_model(object): + def fit_pre(self,xtrain,ytrain,xtest,): + mean,std = Bgolearn_model(xtrain,ytrain,xtest,model_type) + return mean,std + print('The internal model is assigned') + else: print('The external model is instantiated') pass @@ -292,9 +361,6 @@ def fit_pre(self,xtrain,ytrain,xtest,ret_std = 0.0): _MAE = mean_absolute_error(Y_true,_Y_pre) _R2 = r2_score(Y_true,_Y_pre) - - os.makedirs('Bgolearn', exist_ok=True) - print('Fitting goodness on training dataset: \n' + str(' RMSE = %f' % _RMSE) +' '+ str(' MAE = %f' % _MAE) +' '+ str(' R2 = %f' % _R2)) @@ -303,17 +369,25 @@ def fit_pre(self,xtrain,ytrain,xtest,ret_std = 0.0): - results_dataset.to_csv('./Bgolearn/predictionsBy{name}_{year}.{month}.{day}_{hour}.{minute}.csv'.format(name=docu_name(CV_test),year=namey, month=nameM, day=named, hour=nameh, + results_dataset.to_csv('./Bgolearn/predictions{name}_{year}_{month}_{day}_{hour}_{minute}.csv'.format(name=docu_name(CV_test),year=namey, month=nameM, day=named, hour=nameh, minute=namem),encoding='utf-8-sig') - _results_dataset.to_csv('./Bgolearn/predictionsOnTrainingDataset_{year}.{month}.{day}_{hour}.{minute}.csv'.format(year=namey, month=nameM, day=named, hour=nameh, + _results_dataset.to_csv('./Bgolearn/predictionsOnTrainingDataset_{year}_{month}_{day}_{hour}_{minute}.csv'.format(year=namey, month=nameM, day=named, hour=nameh, minute=namem),encoding='utf-8-sig') - V_Xmatrix.to_csv('./Bgolearn/predictionsOfVirtualSampels_{year}.{month}.{day}_{hour}.{minute}.csv'.format(year=namey, month=nameM, day=named, hour=nameh, + V_Xmatrix.to_csv('./Bgolearn/predictionsOfVirtualSampels_{year}_{month}_{day}_{hour}_{minute}.csv'.format(year=namey, month=nameM, day=named, hour=nameh, minute=namem),encoding='utf-8-sig') + + + arv_vs = pd.DataFrame(np.array(virtual_samples)) + arv_vs.columns = Xname + pre,_ = Kriging_model().fit_pre(data_matrix, Measured_response, virtual_samples) + arv_vs['Y'] = np.array(pre) + arv_vs.to_csv('./Bgolearn/PredictionsByBgolearn_{year}_{month}_{day}_{hour}_{minute}.csv'.format(year=namey, month=nameM, day=named, hour=nameh, + minute=namem), encoding='utf-8-sig') # BGO if min_search == True: @@ -331,8 +405,6 @@ def test(self,Ture_fun, Def_Domain,noise_std = 1e-5, Kriging_model = None, opt_n """ PACKAGE: Bayesian global optimization learn . - 6 Apr 2023, version 1.4, Bin Cao, ZheJiang LAB, Hangzhou, CHINA. - :param Ture_fun: the true function being evaluated. e.g., def function(X): X = np.array(X) @@ -456,3 +528,51 @@ def preprocess_data(data): data = np.array(data).reshape(-1, 1) data = np.array(data) return data + + +def Bgolearn_model(xtrain,ytrain,xtest,Kriging_model): + models = { + 'SVM': SVR(), + 'RF': RandomForestRegressor(), + 'AdaB': AdaBoostRegressor(), + 'MLP': MLPRegressor() + } + try: + Bgo_model = models[Kriging_model] + print('Bgolearn model : ', Bgo_model) + except: + print('Type Error: Kriging_model, please check your input of param Kriging_model') + + all_predictions = [] + for _ in range(10): + # Perform Bootstrap sampling + X_bootstrap, y_bootstrap = resample(xtrain, ytrain) + predictions = Bgo_model.fit(X_bootstrap, y_bootstrap).predict(xtest) + # Store the predictions + all_predictions.append(predictions) + + # Convert the list of predictions to a NumPy array for easier calculations + all_predictions = np.array(all_predictions) + # Calculate mean and standard deviation across the samples + mean = np.mean(all_predictions, axis=0) + std = np.std(all_predictions, axis=0) + return mean, std + +def Resampling(data_matrix,Measured_response,min_search,seed_): + + np.random.seed(seed_) + max_value = max(Measured_response) + min_value = min(Measured_response) + prob = (Measured_response - min_value) / (max_value - min_value) + if min_search == True: + prob = 1 - prob + cdf = np.cumsum(prob) + cdf_ = cdf / cdf[-1] + uniform_samples = np.random.random_sample(len(Measured_response)) + bootstrap_idx = cdf_.searchsorted(uniform_samples, side='right') + # searchsorted returns a scalar + bootstrap_idx = np.array(bootstrap_idx, copy=False) + print('Importance resampling is APPLIED','\n') + + + return data_matrix[bootstrap_idx], Measured_response[bootstrap_idx] \ No newline at end of file diff --git a/Bgolearn/BGO_eval.py b/Bgolearn/BgolearnFuns/BGO_eval.py similarity index 97% rename from Bgolearn/BGO_eval.py rename to Bgolearn/BgolearnFuns/BGO_eval.py index f7dac64..7745651 100755 --- a/Bgolearn/BGO_eval.py +++ b/Bgolearn/BgolearnFuns/BGO_eval.py @@ -126,7 +126,7 @@ def Trail(self,trails = 100, Max_inter = 500, tol = 0.1, ini_nb = None, UTFs = ' for j in range(Max_inter): # Max_inter = 500 is an enough large number BGO_mdoel = Global_min(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -157,7 +157,7 @@ def Trail(self,trails = 100, Max_inter = 500, tol = 0.1, ini_nb = None, UTFs = ' for j in range(Max_inter): # Max_inter = 500 is a enough large number BGO_mdoel = Global_max(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -225,7 +225,7 @@ def Opp_Cost(self,trails = 10, Max_inter = 500, threshold = 0.05, ini_nb = None, Iter += 1 # Max_inter = 500 is a enough large number BGO_mdoel = Global_min(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -258,7 +258,7 @@ def Opp_Cost(self,trails = 10, Max_inter = 500, threshold = 0.05, ini_nb = None, Iter +=1 # Max_inter = 500 is a enough large number BGO_mdoel = Global_max(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -340,7 +340,7 @@ def Pdf(self,trails = 200, Max_inter = 500, tol = 0.1, num_bins = 20, ini_nb = N for j in range(Max_inter): # Max_inter = 500 is a enough large number BGO_mdoel = Global_min(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -364,7 +364,7 @@ def Pdf(self,trails = 200, Max_inter = 500, tol = 0.1, num_bins = 20, ini_nb = N for j in range(Max_inter): # Max_inter = 500 is a enough large number BGO_mdoel = Global_min(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, Ref_UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -398,7 +398,7 @@ def Pdf(self,trails = 200, Max_inter = 500, tol = 0.1, num_bins = 20, ini_nb = N for j in range(Max_inter): # Max_inter = 500 is a enough large number BGO_mdoel = Global_max(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -422,7 +422,7 @@ def Pdf(self,trails = 200, Max_inter = 500, tol = 0.1, num_bins = 20, ini_nb = N for j in range(Max_inter): # Max_inter = 500 is a enough large number BGO_mdoel = Global_max(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, Ref_UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -505,7 +505,7 @@ def Count(self,trails = 100, Max_inter = 5, tol = 0.1, ini_nb = None, UTFs = 'EI for j in range(Max_inter): # Max_inter is the threshold BGO_mdoel = Global_min(self.Kriging_model,train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) @@ -534,7 +534,7 @@ def Count(self,trails = 100, Max_inter = 5, tol = 0.1, ini_nb = None, UTFs = 'EI for j in range(Max_inter): # Max_inter is the threshold BGO_mdoel = Global_max(self.Kriging_model, train_X, train_Y, self.Def_Domain, - self.opt_num, self.ret_noise) + self.opt_num, self.ret_noise,self.Def_Domain) _, return_x = self.Call(BGO_mdoel, UTFs, param_one, param_two) new_X = return_x new_Y = self.Ture_fun(new_X) diff --git a/Bgolearn/BGOclf.py b/Bgolearn/BgolearnFuns/BGOclf.py similarity index 96% rename from Bgolearn/BGOclf.py rename to Bgolearn/BgolearnFuns/BGOclf.py index 1e536b0..5efb3fc 100755 --- a/Bgolearn/BGOclf.py +++ b/Bgolearn/BgolearnFuns/BGOclf.py @@ -14,6 +14,9 @@ def __init__(self,model,data_matrix, Measured_response, virtual_samples, opt_num def Least_cfd(self,): + """ + Least Confidence + """ Lc = [] for i in range(len(self.probs)): max_pro = np.array(self.probs[i]).max() @@ -35,6 +38,9 @@ def Least_cfd(self,): return LcValue,np.array(return_x) def Margin_S(self,): + """ + Margin Sampling + """ Margin = [] for i in range(len(self.probs)): targ_list = list(self.probs[i]) @@ -61,6 +67,9 @@ def Margin_S(self,): def Entropy(self,): + """ + Entropy-based approach + """ Entropy_value = [] for i in range(len(self.probs)): Etp = 0 diff --git a/Bgolearn/BGOmax.py b/Bgolearn/BgolearnFuns/BGOmax.py similarity index 69% rename from Bgolearn/BGOmax.py rename to Bgolearn/BgolearnFuns/BGOmax.py index af891a8..a97ebcf 100755 --- a/Bgolearn/BGOmax.py +++ b/Bgolearn/BgolearnFuns/BGOmax.py @@ -1,11 +1,12 @@ -import copy +import copy,os import warnings import numpy as np from scipy.stats import norm +import multiprocess as mp +import multiprocessing + + -# cal norm prob. -def norm_des(x): - return 1 / np.sqrt(2 * np.pi) * np.exp(- x ** 2 / 2) class Global_max(object): def __init__(self,Kriging_model,data_matrix, Measured_response, virtual_samples, opt_num, ret_noise,row_features): @@ -24,10 +25,14 @@ def __init__(self,Kriging_model,data_matrix, Measured_response, virtual_samples, self.ret_noise = ret_noise self.row_features = row_features warnings.filterwarnings('ignore') + os.environ["PYTHONWARNINGS"] = "ignore" def EI(self,): + """ + Expected Improvement algorith + """ cur_optimal_value = self.Measured_response.max() print('current optimal is :', cur_optimal_value) EI_list = [] @@ -43,11 +48,13 @@ def EI(self,): if self.opt_num == 1: EI_opt_index = np.random.choice(np.flatnonzero(EI_list == EI_list.max())) print('The next datum recomended by Expected Improvement : \n x = ', self.row_features[EI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EI_opt_index]) return_x.append(self.row_features[EI_opt_index]) elif type(self.opt_num) == int: - EI_opt_index = np.argpartition(EI_list, -self.opt_num)[-self.opt_num:] + EI_opt_index = np.argsort(EI_list)[-self.opt_num:][::-1] for j in range(len(EI_opt_index)): print('The {num}-th datum recomended by Expected Improvement : \n x = '.format(num =j+1), self.row_features[EI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EI_opt_index[j]]) return_x.append(self.row_features[EI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -56,6 +63,9 @@ def EI(self,): def EI_plugin(self,): + """ + Expected improvement with “plugin” + """ if self.ret_noise == 0: __train_ypre,_ = self.Kriging_model().fit_pre(self.data_matrix,self.Measured_response, self.data_matrix) else: @@ -75,11 +85,13 @@ def EI_plugin(self,): if self.opt_num == 1: EIp_opt_index = np.random.choice(np.flatnonzero(EIp_list == EIp_list.max())) print('The next datum recomended by Expected Improvement with plugin : \n x = ', self.row_features[EIp_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EIp_opt_index]) return_x.append(self.row_features[EIp_opt_index]) elif type(self.opt_num) == int: - EIp_opt_index = np.argpartition(EIp_list, -self.opt_num)[-self.opt_num:] + EIp_opt_index = np.argsort(EIp_list)[-self.opt_num:][::-1] for j in range(len(EIp_opt_index)): print('The {num}-th datum recomended by Expected Improvement with plugin : \n x = '.format(num =j+1), self.row_features[EIp_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EIp_opt_index[j]]) return_x.append(self.row_features[EIp_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -88,6 +100,7 @@ def EI_plugin(self,): def Augmented_EI(self, alpha = 1, tao = 0): """ + Augmented Expected Improvement :param alpha: tradeoff coefficient, default 1 :param tao: noise standard deviation, default 0 """ @@ -113,11 +126,13 @@ def Augmented_EI(self, alpha = 1, tao = 0): if self.opt_num == 1: AEI_opt_index = np.random.choice(np.flatnonzero(AEI_list == AEI_list.max())) print('The next datum recomended by Augmented_EI : \n x = ', self.row_features[AEI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[AEI_opt_index]) return_x.append(self.row_features[AEI_opt_index]) elif type(self.opt_num) == int: - AEI_opt_index = np.argpartition(AEI_list, -self.opt_num)[-self.opt_num:] + AEI_opt_index = np.argsort(AEI_list)[-self.opt_num:][::-1] for j in range(len(AEI_opt_index)): print('The {num}-th datum recomended by Augmented_EI : \n x = '.format(num =j+1), self.row_features[AEI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[AEI_opt_index[j]]) return_x.append(self.row_features[AEI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -126,6 +141,7 @@ def Augmented_EI(self, alpha = 1, tao = 0): def EQI(self, beta = 0.5,tao_new = 0): """ + Expected Quantile Improvement :param beta: beta quantile number, default 0.5 :param tao: noise standard deviation, default 0 """ @@ -153,11 +169,13 @@ def EQI(self, beta = 0.5,tao_new = 0): if self.opt_num == 1: EQI_opt_index = np.random.choice(np.flatnonzero(EQI_list == EQI_list.max())) print('The next datum recomended by Expected Quantile Improvement : \n x = ', self.row_features[EQI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EQI_opt_index]) return_x.append(self.row_features[EQI_opt_index]) elif type(self.opt_num) == int: - EQI_opt_index = np.argpartition(EQI_list, -self.opt_num)[-self.opt_num:] + EQI_opt_index = np.argsort(EQI_list)[-self.opt_num:][::-1] for j in range(len(EQI_opt_index)): print('The {num}-th datum recomended by Expected Quantile Improvement : \n x = '.format(num =j+1), self.row_features[EQI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EQI_opt_index[j]]) return_x.append(self.row_features[EQI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -166,6 +184,9 @@ def EQI(self, beta = 0.5,tao_new = 0): def Reinterpolation_EI(self, ): + """ + Reinterpolation Expected Improvement + """ if self.ret_noise == 0: __update_y,_ = self.Kriging_model().fit_pre(self.data_matrix,self.Measured_response, self.data_matrix) else: @@ -189,11 +210,13 @@ def Reinterpolation_EI(self, ): if self.opt_num == 1: REI_opt_index = np.random.choice(np.flatnonzero(REI_list == REI_list.max())) print('The next datum recomended by Reinterpolation Expected Improvement : \n x = ', self.row_features[REI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[REI_opt_index]) return_x.append(self.row_features[REI_opt_index]) elif type(self.opt_num) == int: - REI_opt_index = np.argpartition(REI_list, -self.opt_num)[-self.opt_num:] + REI_opt_index = np.argsort(REI_list)[-self.opt_num:][::-1] for j in range(len(REI_opt_index)): print('The {num}-th datum recomended by Reinterpolation Expected Improvement : \n x = '.format(num =j+1), self.row_features[REI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[REI_opt_index[j]]) return_x.append(self.row_features[REI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -203,6 +226,7 @@ def Reinterpolation_EI(self, ): def UCB(self, alpha=1): """ + Upper confidence bound :param alpha: tradeoff coefficient, default 1 """ UCB_list = np.array(self.virtual_samples_mean) + alpha * np.array(self.virtual_samples_std) @@ -211,11 +235,13 @@ def UCB(self, alpha=1): if self.opt_num == 1: UCB_opt_index = np.random.choice(np.flatnonzero(UCB_list == UCB_list.max())) print('The next datum recomended by Upper confidence bound : \n x = ', self.row_features[UCB_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[UCB_opt_index]) return_x.append(self.row_features[UCB_opt_index]) elif type(self.opt_num) == int: - UCB_opt_index = np.argpartition(UCB_list, -self.opt_num)[-self.opt_num:] + UCB_opt_index = np.argsort(UCB_list)[-self.opt_num:][::-1] for j in range(len(UCB_opt_index)): print('The {num}-th datum recomended by Upper confidence bound : \n x = '.format(num =j+1), self.row_features[UCB_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[UCB_opt_index[j]]) return_x.append(self.row_features[UCB_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -224,6 +250,7 @@ def UCB(self, alpha=1): def PoI(self, tao = 0): """ + Probability of Improvement :param tao: improvement ratio (>=0) , default 0 """ if tao < 0: @@ -243,11 +270,13 @@ def PoI(self, tao = 0): if self.opt_num == 1: PoI_opt_index = np.random.choice(np.flatnonzero(PoI_list == PoI_list.max())) print('The next datum recomended by Probability of Improvement : \n x = ', self.row_features[PoI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PoI_opt_index]) return_x.append(self.row_features[PoI_opt_index]) elif type(self.opt_num) == int: - PoI_opt_index = np.argpartition(PoI_list, -self.opt_num)[-self.opt_num:] + PoI_opt_index = np.argsort(PoI_list)[-self.opt_num:][::-1] for j in range(len(PoI_opt_index)): print('The {num}-th datum recomended by Probability of Improvement : \n x = '.format(num =j+1), self.row_features[PoI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PoI_opt_index[j]]) return_x.append(self.row_features[PoI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -265,6 +294,7 @@ def Thompson_sampling(self,): def PES(self, sam_num = 500): """ + Predictive Entropy Search :param sam_num: number of optimal drawn from p(x*|D), D is support set, default 500 """ # sam_num: number of optimal drawn from p(x*|D),D is support set @@ -297,59 +327,114 @@ def PES(self, sam_num = 500): if self.opt_num == 1: PES_opt_index = np.random.choice(np.flatnonzero(PES_list == PES_list.max())) print('The next datum recomended by Predictive Entropy Search : \n x = ', self.row_features[PES_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PES_opt_index]) return_x.append(self.row_features[PES_opt_index]) elif type(self.opt_num) == int: - PES_opt_index = np.argpartition(PES_list, -self.opt_num)[-self.opt_num:] + PES_opt_index = np.argsort(PES_list)[-self.opt_num:][::-1] for j in range(len(PES_opt_index)): print('The {num}-th datum recomended by Predictive Entropy Search : \n x = '.format(num =j+1), self.row_features[PES_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PES_opt_index[j]]) return_x.append(self.row_features[PES_opt_index[j]]) else: print('The input para. opt_num must be an int') return PES_list,np.array(return_x) - def Knowledge_G(self,MC_num = 50): + + + + def __Knowledge_G_per_sample(self, func_bytes:bytes, MC_num:int, virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, ret_noise): + MC_batch_max = 0 + for _ in range(MC_num): + # generate y value + y_value = np.random.normal(loc = v_sample_mean, scale = v_sample_std) + # update the sample x and sample y + archive_sample_x[-len(x_value):] = x_value + if isinstance(y_value, float): + archive_sample_y[-1] = y_value + elif isinstance(y_value, np.ndarray): + archive_sample_y[-len(y_value):] = y_value + # calculate the post mean + if ret_noise == True: + # return a callable model + post_mean, _ = func_bytes.fit_pre(archive_sample_x.reshape(-1, fea_num), archive_sample_y, virtual_samples, v_sample_std) + else: + post_mean, _ = func_bytes.fit_pre(archive_sample_x.reshape(-1, fea_num), archive_sample_y, virtual_samples) + MC_batch_max += post_mean.max() + return MC_batch_max + + def Knowledge_G(self,MC_num = 1, Proc_num:int=None): """ - :param MC_num: number of Monte carol, default 50 + Calculate the Knowledge Gradient for Bayesian optimization. + + :param MC_num: Number of Monte Carlo simulations, default 1.(1-10) + :param Proc_num: Number of processors, default None (0) for single process. + :return: Knowledge Gradient values and recommended data points. + + for windows operating systems, please ensures that the code inside the if __name__ == '__main__': + e.g., + import multiprocessing as mp + if __name__ == '__main__': + # Freeze support for Windows + mp.freeze_support() + + # Call your function + Mymodel.Knowledge_G(MC_num=100,Proc_num=6) """ current_max = self.virtual_samples_mean.max() KD_list = [] - vir_num = len(self.virtual_samples) - for i in range(vir_num): - x_value = self.virtual_samples[i] - MC_batch_max = 0 - for j in range(MC_num): - y_value = np.random.normal(loc = self.virtual_samples_mean[i],scale = self.virtual_samples_std[i]) - archive_sample_x = copy.deepcopy(self.data_matrix) - archive_sample_y = copy.deepcopy(self.Measured_response) - - archive_sample_x = np.append(archive_sample_x, x_value) - archive_sample_y = np.append(archive_sample_y, y_value) - fea_num = len(self.data_matrix[0]) - # return a callable model - if self.ret_noise == 1: - post_mean, _ = self.Kriging_model().fit_pre(archive_sample_x.reshape(-1, fea_num),archive_sample_y,self.virtual_samples,self.virtual_samples_std[i]) - else: - post_mean, _ = self.Kriging_model().fit_pre(archive_sample_x.reshape(-1, fea_num),archive_sample_y,self.virtual_samples) - MC_batch_max += post_mean.max() - MC_times = i*MC_num + j+1 - if MC_times % 2000 == 0: - print('The {num}-th Monte carol simulation'.format(num = MC_times)) - MC_result = MC_batch_max / MC_num - KD_list.append( MC_result - current_max) + fea_num = len(self.data_matrix[0]) + archive_sample_x = np.append(self.data_matrix[:], self.virtual_samples[0]) + archive_sample_y = np.append(self.Measured_response[:], self.virtual_samples_mean[0]) + K_model = self.Kriging_model() + results = [] + i = 0 + if not Proc_num: + print('Execution using a single process') + for x_value, v_sample_mean, v_sample_std in zip(self.virtual_samples, self.virtual_samples_mean, self.virtual_samples_std): + MC_batch_max= self.__Knowledge_G_per_sample(K_model, MC_num, self.virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, self.ret_noise) + MC_result = MC_batch_max / MC_num + KD_list.append(MC_result - current_max ) + i += 1 + print('The Monte Carlo simulation has been performed {num} times.'.format(num = i * MC_num)) + else: + + # Call this function at the beginning of your script + setup_multiprocessing() + print('Execution using multiple processes, processes num = {},'.format(Proc_num) ) + with mp.get_context("spawn").Pool(Proc_num) as pool: + results=[pool.apply_async(self.__Knowledge_G_per_sample, args=(K_model, MC_num, self.virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, self.ret_noise)) for x_value, v_sample_mean, v_sample_std in zip(self.virtual_samples, self.virtual_samples_mean, self.virtual_samples_std)] + for idx, rst in enumerate(results): + MC_batch_max = rst.get() + MC_result = MC_batch_max / MC_num + KD_list.append(MC_result - current_max ) + i += 1 + print('The Monte Carlo simulation has been performed {num} times.'.format(num = i * MC_num)) KD_list = np.array(KD_list) - + return_x = [] if self.opt_num == 1: KD_opt_index = np.random.choice(np.flatnonzero(KD_list == KD_list.max())) print('The next datum recomended by Knowledge Gradient : \n x = ', self.row_features[KD_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[KD_opt_index]) return_x.append(self.row_features[KD_opt_index]) elif type(self.opt_num) == int: - KD_opt_index = np.argpartition(KD_list, -self.opt_num)[-self.opt_num:] + KD_opt_index = np.argsort(KD_list)[-self.opt_num:][::-1] for j in range(len(KD_opt_index)): print('The {num}-th datum recomended by Knowledge Gradient : \n x = '.format(num =j+1), self.row_features[KD_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[KD_opt_index[j]]) return_x.append(self.row_features[KD_opt_index[j]]) else: print('The input para. opt_num must be an int') return KD_list,np.array(return_x) + +def setup_multiprocessing(): + if multiprocessing.get_start_method() != 'fork': + try: + multiprocessing.set_start_method('fork') + except RuntimeError: + print('\'fork\' method not available, using the default') +# cal norm prob. +def norm_des(x): + return 1 / np.sqrt(2 * np.pi) * np.exp(- x ** 2 / 2) diff --git a/Bgolearn/BGOmin.py b/Bgolearn/BgolearnFuns/BGOmin.py similarity index 80% rename from Bgolearn/BGOmin.py rename to Bgolearn/BgolearnFuns/BGOmin.py index 4a6ff1a..732b68d 100755 --- a/Bgolearn/BGOmin.py +++ b/Bgolearn/BgolearnFuns/BGOmin.py @@ -1,12 +1,11 @@ -import copy, os +import copy,os import warnings import numpy as np +import pandas as pd from scipy.stats import norm import multiprocess as mp +import multiprocessing -# cal norm prob. -def norm_des(x): - return 1 / np.sqrt(2 * np.pi) * np.exp(- x ** 2 / 2) class Global_min(object): def __init__(self,Kriging_model,data_matrix, Measured_response, virtual_samples, opt_num, ret_noise, row_features): @@ -28,8 +27,10 @@ def __init__(self,Kriging_model,data_matrix, Measured_response, virtual_samples, os.environ["PYTHONWARNINGS"] = "ignore" - def EI(self,): + """ + Expected Improvement algorith + """ cur_optimal_value = self.Measured_response.min() print('current optimal is :', cur_optimal_value) EI_list = [] @@ -44,17 +45,22 @@ def EI(self,): if self.opt_num == 1: EI_opt_index = np.random.choice(np.flatnonzero(EI_list == EI_list.max())) print('The next datum recomended by Expected Improvement : \n x = ', self.row_features[EI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EI_opt_index]) return_x.append(self.row_features[EI_opt_index]) elif type(self.opt_num) == int: - EI_opt_index = np.argpartition(EI_list, -self.opt_num)[-self.opt_num:] + EI_opt_index = np.argsort(EI_list)[-self.opt_num:][::-1] for j in range(len(EI_opt_index)): print('The {num}-th datum recomended by Expected Improvement : \n x = '.format(num =j+1), self.row_features[EI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EI_opt_index[j]]) return_x.append(self.row_features[EI_opt_index[j]]) else: print('The input para. opt_num must be an int') return EI_list, np.array(return_x) def EI_plugin(self,): + """ + Expected improvement with “plugin” + """ if self.ret_noise == 0: __train_ypre,_ = self.Kriging_model().fit_pre(self.data_matrix,self.Measured_response, self.data_matrix) else: @@ -74,11 +80,13 @@ def EI_plugin(self,): if self.opt_num == 1: EIp_opt_index = np.random.choice(np.flatnonzero(EIp_list == EIp_list.max())) print('The next datum recomended by Expected Improvement with plugin : \n x = ', self.row_features[EIp_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EIp_opt_index]) return_x.append(self.row_features[EIp_opt_index]) elif type(self.opt_num) == int: - EIp_opt_index = np.argpartition(EIp_list, -self.opt_num)[-self.opt_num:] + EIp_opt_index = np.argsort(EIp_list)[-self.opt_num:][::-1] for j in range(len(EIp_opt_index)): print('The {num}-th datum recomended by Expected Improvement with plugin : \n x = '.format(num =j+1), self.row_features[EIp_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EIp_opt_index[j]]) return_x.append(self.row_features[EIp_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -87,6 +95,7 @@ def EI_plugin(self,): def Augmented_EI(self, alpha = 1, tao = 0): """ + Augmented Expected Improvement :param alpha: tradeoff coefficient, default 1 :param tao: noise standard deviation, default 0 """ @@ -112,11 +121,13 @@ def Augmented_EI(self, alpha = 1, tao = 0): if self.opt_num == 1: AEI_opt_index = np.random.choice(np.flatnonzero(AEI_list == AEI_list.max())) print('The next datum recomended by Augmented_EI : \n x = ', self.row_features[AEI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[AEI_opt_index]) return_x.append(self.row_features[AEI_opt_index]) elif type(self.opt_num) == int: - AEI_opt_index = np.argpartition(AEI_list, -self.opt_num)[-self.opt_num:] + AEI_opt_index = np.argsort(AEI_list)[-self.opt_num:][::-1] for j in range(len(AEI_opt_index)): print('The {num}-th datum recomended by Augmented_EI : \n x = '.format(num =j+1), self.row_features[AEI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[AEI_opt_index[j]]) return_x.append(self.row_features[AEI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -125,6 +136,7 @@ def Augmented_EI(self, alpha = 1, tao = 0): def EQI(self, beta = 0.5,tao_new = 0): """ + Expected Quantile Improvement :param beta: beta quantile number, default 0.5 :param tao: noise standard deviation, default 0 """ @@ -152,11 +164,13 @@ def EQI(self, beta = 0.5,tao_new = 0): if self.opt_num == 1: EQI_opt_index = np.random.choice(np.flatnonzero(EQI_list == EQI_list.max())) print('The next datum recomended by Expected Quantile Improvement : \n x = ', self.row_features[EQI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EQI_opt_index]) return_x.append(self.row_features[EQI_opt_index]) elif type(self.opt_num) == int: - EQI_opt_index = np.argpartition(EQI_list, -self.opt_num)[-self.opt_num:] + EQI_opt_index = np.argsort(EQI_list)[-self.opt_num:][::-1] for j in range(len(EQI_opt_index)): print('The {num}-th datum recomended by Expected Quantile Improvement : \n x = '.format(num =j+1), self.row_features[EQI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[EQI_opt_index[j]]) return_x.append(self.row_features[EQI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -164,6 +178,9 @@ def EQI(self, beta = 0.5,tao_new = 0): return EQI_list,np.array(return_x) def Reinterpolation_EI(self, ): + """ + Reinterpolation Expected Improvement + """ if self.ret_noise == 0: __update_y,_ = self.Kriging_model().fit_pre(self.data_matrix,self.Measured_response, self.data_matrix) else: @@ -187,11 +204,13 @@ def Reinterpolation_EI(self, ): if self.opt_num == 1: REI_opt_index = np.random.choice(np.flatnonzero(REI_list == REI_list.max())) print('The next datum recomended by Reinterpolation Expected Improvement : \n x = ', self.row_features[REI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[REI_opt_index]) return_x.append(self.row_features[REI_opt_index]) elif type(self.opt_num) == int: - REI_opt_index = np.argpartition(REI_list, -self.opt_num)[-self.opt_num:] + REI_opt_index = np.argsort(REI_list)[-self.opt_num:][::-1] for j in range(len(REI_opt_index)): print('The {num}-th datum recomended by Reinterpolation Expected Improvement : \n x = '.format(num =j+1), self.row_features[REI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[REI_opt_index[j]]) return_x.append(self.row_features[REI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -199,6 +218,7 @@ def Reinterpolation_EI(self, ): def UCB(self, alpha=1): """ + Upper confidence bound :param alpha: tradeoff coefficient, default 1 """ UCB_list = np.array(self.virtual_samples_mean) - alpha * np.array(self.virtual_samples_std) @@ -207,11 +227,13 @@ def UCB(self, alpha=1): if self.opt_num == 1: UCB_opt_index = np.random.choice(np.flatnonzero(UCB_list == UCB_list.min())) print('The next datum recomended by Upper confidence bound : \n x = ', self.row_features[UCB_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[UCB_opt_index]) return_x.append(self.row_features[UCB_opt_index]) elif type(self.opt_num) == int: - UCB_opt_index = np.argpartition(UCB_list, self.opt_num)[:self.opt_num] + UCB_opt_index = np.argsort(UCB_list)[:self.opt_num] for j in range(len(UCB_opt_index)): print('The {num}-th datum recomended by Upper confidence bound : \n x = '.format(num =j+1), self.row_features[UCB_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[UCB_opt_index[j]]) return_x.append(self.row_features[UCB_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -220,6 +242,7 @@ def UCB(self, alpha=1): def PoI(self, tao = 0): """ + Probability of Improvement :param tao: improvement ratio (>=0) , default 0 """ if tao < 0: @@ -238,11 +261,13 @@ def PoI(self, tao = 0): if self.opt_num == 1: PoI_opt_index = np.random.choice(np.flatnonzero(PoI_list == PoI_list.max())) print('The next datum recomended by Probability of Improvement : \n x = ', self.row_features[PoI_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PoI_opt_index]) return_x.append(self.row_features[PoI_opt_index]) elif type(self.opt_num) == int: - PoI_opt_index = np.argpartition(PoI_list, -self.opt_num)[-self.opt_num:] + PoI_opt_index = np.argsort(PoI_list)[-self.opt_num:][::-1] for j in range(len(PoI_opt_index)): print('The {num}-th datum recomended by Probability of Improvement : \n x = '.format(num =j+1), self.row_features[PoI_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PoI_opt_index[j]]) return_x.append(self.row_features[PoI_opt_index[j]]) else: print('The input para. opt_num must be an int') @@ -260,6 +285,7 @@ def Thompson_sampling(self,): def PES(self, sam_num = 500): """ + Predictive Entropy Search :param sam_num: number of optimal drawn from p(x*|D), D is support set, default 500 """ # sam_num: number of optimal drawn from p(x*|D),D is support set @@ -291,16 +317,19 @@ def PES(self, sam_num = 500): if self.opt_num == 1: PES_opt_index = np.random.choice(np.flatnonzero(PES_list == PES_list.max())) print('The next datum recomended by Predictive Entropy Search : \n x = ', self.row_features[PES_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PES_opt_index]) return_x.append(self.row_features[PES_opt_index]) elif type(self.opt_num) == int: - PES_opt_index = np.argpartition(PES_list, -self.opt_num)[-self.opt_num:] + PES_opt_index = np.argsort(PES_list)[-self.opt_num:][::-1] for j in range(len(PES_opt_index)): print('The {num}-th datum recomended by Predictive Entropy Search : \n x = '.format(num =j+1), self.row_features[PES_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[PES_opt_index[j]]) return_x.append(self.row_features[PES_opt_index[j]]) else: print('The input para. opt_num must be an int') return PES_list,np.array(return_x) + def __Knowledge_G_per_sample(self, func_bytes:bytes, MC_num:int, virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, ret_noise): MC_batch_min = 0 for _ in range(MC_num): @@ -321,10 +350,23 @@ def __Knowledge_G_per_sample(self, func_bytes:bytes, MC_num:int, virtual_samples MC_batch_min += post_mean.min() return MC_batch_min - def Knowledge_G(self,MC_num = 50, Proc_num:int=None): + def Knowledge_G(self,MC_num = 1, Proc_num:int=None): """ - :param MC_num: number of Monte carol, default 50 - :param Proc_num: number of Processor, default None (0) + Calculate the Knowledge Gradient for Bayesian optimization. + + :param MC_num: Number of Monte Carlo simulations, default 1. (1-10) + :param Proc_num: Number of processors, default None (0) for single process. + :return: Knowledge Gradient values and recommended data points. + + for windows operating systems, please ensures that the code inside the if __name__ == '__main__': + e.g., + import multiprocessing as mp + if __name__ == '__main__': + # Freeze support for Windows + mp.freeze_support() + + # Call your function + Mymodel.Knowledge_G(MC_num=100,Proc_num=6) """ current_min = self.virtual_samples_mean.min() KD_list = [] @@ -333,31 +375,54 @@ def Knowledge_G(self,MC_num = 50, Proc_num:int=None): archive_sample_y = np.append(self.Measured_response[:], self.virtual_samples_mean[0]) K_model = self.Kriging_model() results = [] + i = 0 if not Proc_num: + print('Execution using a single process') for x_value, v_sample_mean, v_sample_std in zip(self.virtual_samples, self.virtual_samples_mean, self.virtual_samples_std): MC_batch_min= self.__Knowledge_G_per_sample(K_model, MC_num, self.virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, self.ret_noise) MC_result = MC_batch_min / MC_num KD_list.append( current_min - MC_result) + i += 1 + print('The Monte Carlo simulation has been performed {num} times.'.format(num = i * MC_num)) else: + + # Call this function at the beginning of your script + setup_multiprocessing() + print('Execution using multiple processes, processes num = {},'.format(Proc_num) ) with mp.get_context("spawn").Pool(Proc_num) as pool: results=[pool.apply_async(self.__Knowledge_G_per_sample, args=(K_model, MC_num, self.virtual_samples, v_sample_mean, v_sample_std, archive_sample_x, archive_sample_y, x_value, fea_num, self.ret_noise)) for x_value, v_sample_mean, v_sample_std in zip(self.virtual_samples, self.virtual_samples_mean, self.virtual_samples_std)] for idx, rst in enumerate(results): MC_batch_min = rst.get() MC_result = MC_batch_min / MC_num KD_list.append( current_min - MC_result) + i += 1 + print('The Monte Carlo simulation has been performed {num} times.'.format(num = i * MC_num)) KD_list = np.array(KD_list) return_x = [] if self.opt_num == 1: KD_opt_index = np.random.choice(np.flatnonzero(KD_list == KD_list.max())) print('The next datum recomended by Knowledge Gradient : \n x = ', self.row_features[KD_opt_index]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[KD_opt_index]) return_x.append(self.row_features[KD_opt_index]) elif type(self.opt_num) == int: KD_opt_index = np.argpartition(KD_list, -self.opt_num)[-self.opt_num:] for j in range(len(KD_opt_index)): print('The {num}-th datum recomended by Knowledge Gradient : \n x = '.format(num =j+1), self.row_features[KD_opt_index[j]]) + print('The predictions of Bgolearn are : \n y = ', self.virtual_samples_mean[KD_opt_index[j]]) return_x.append(self.row_features[KD_opt_index[j]]) else: print('The input para. opt_num must be an int') return KD_list,np.array(return_x) + + +def setup_multiprocessing(): + if multiprocessing.get_start_method() != 'fork': + try: + multiprocessing.set_start_method('fork') + except RuntimeError: + print('\'fork\' method not available, using the default') +# cal norm prob. +def norm_des(x): + return 1 / np.sqrt(2 * np.pi) * np.exp(- x ** 2 / 2) \ No newline at end of file diff --git a/Bgolearn/BgolearnFuns/Untitled.ipynb b/Bgolearn/BgolearnFuns/Untitled.ipynb new file mode 100644 index 0000000..0e8c8d9 --- /dev/null +++ b/Bgolearn/BgolearnFuns/Untitled.ipynb @@ -0,0 +1,56 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "id": "daf8e1f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'__pycache__/BGOmin.cpython-39.pyc'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import py_compile\n", + "\n", + "py_compile.compile('BGOmin.py')\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9ee98373", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Bgolearn/__init__.py b/Bgolearn/__init__.py index bd419b2..93b4000 100755 --- a/Bgolearn/__init__.py +++ b/Bgolearn/__init__.py @@ -4,17 +4,8 @@ __author__ = 'Bin Cao, Advanced Materials Thrust, Hong Kong University of Science and Technology (Guangzhou)' __author_email__ = 'binjacobcao@gmail.com' __url__ = 'https://github.com/Bin-Cao/Bgolearn' +__DOI__ = 'Cao, Bin and Su, Tianhao and Yu, Shuting and Li, Tianyuan and Zhang, Taolue and Dong, Ziqiang and Zhang, Tong-Yi, Active Learning Accelerates the Discovery of High Strength and High Ductility Lead-Free Solder Alloys. Available at SSRN: https://ssrn.com/abstract=4686075 or http://dx.doi.org/10.2139/ssrn.4686075' -import datetime -now = datetime.datetime.now() -formatted_date_time = now.strftime('%Y-%m-%d %H:%M:%S') + - - -print('A Bayesian global optimization package') -print('Bgolearn, Bin CAO, HKUST(GZ)') -print('Intro : https://bgolearn.netlify.app/') -print('URL : https://github.com/Bin-Cao/Bgolearn') -print('Executed on :',formatted_date_time, ' | Have a great day.') -print('='*80) diff --git a/Template/Bgolearn/PredictionsByBgolearn_2024_1_7_8_43.csv b/Template/Bgolearn/PredictionsByBgolearn_2024_1_7_8_43.csv new file mode 100644 index 0000000..e2e5706 --- /dev/null +++ b/Template/Bgolearn/PredictionsByBgolearn_2024_1_7_8_43.csv @@ -0,0 +1,112 @@ +,x,Y +0,0.0,6.155134112735885 +1,0.009090909090909092,6.167322011579654 +2,0.018181818181818184,6.159348945279813 +3,0.02727272727272728,6.129979505824761 +4,0.03636363636363637,6.078876678327495 +5,0.045454545454545456,6.006669102966461 +6,0.05454545454545456,5.914952359905194 +7,0.06363636363636364,5.806221491898051 +8,0.07272727272727274,5.683738531632373 +9,0.08181818181818182,5.551345138779945 +10,0.09090909090909091,5.413235805736983 +11,0.1,5.273710806995612 +12,0.10909090909090911,5.136929701193356 +13,0.1181818181818182,5.006685576812045 +14,0.1272727272727273,4.886217486565754 +15,0.13636363636363635,4.778074036909838 +16,0.14545454545454548,4.684035490102882 +17,0.15454545454545457,4.6050957141332205 +18,0.16363636363636364,4.541499608283588 +19,0.17272727272727276,4.492826877388426 +20,0.18181818181818182,4.4581096946860725 +21,0.19090909090909092,4.435970131466804 +22,0.2,4.424763258630415 +23,0.20909090909090913,4.42271334560359 +24,0.21818181818181823,4.428033236310463 +25,0.2272727272727273,4.439020311759614 +26,0.2363636363636364,4.454125968807003 +27,0.24545454545454548,4.471998808633611 +28,0.2545454545454546,4.491504382570917 +29,0.26363636363636367,4.51172615932254 +30,0.2727272727272727,4.531953267380266 +31,0.28181818181818186,4.551660572261887 +32,0.29090909090909095,4.570485921393197 +33,0.30000000000000004,4.588208155756038 +34,0.30909090909090914,4.604728007801087 +35,0.3181818181818182,4.620052536970949 +36,0.32727272727272727,4.634282517603507 +37,0.33636363636363636,4.647601346715705 +38,0.3454545454545455,4.66026366219304 +39,0.3545454545454546,4.67258195655652 +40,0.36363636363636365,4.6849099672774726 +41,0.3727272727272728,4.697622396111218 +42,0.38181818181818183,4.711091399659975 +43,0.39090909090909093,4.725661137571306 +44,0.4,4.741622318109894 +45,0.4090909090909091,4.759189036906273 +46,0.41818181818181827,4.778480209444685 +47,0.4272727272727273,4.7995075544323385 +48,0.43636363636363645,4.822171449804063 +49,0.4454545454545455,4.8462651531633005 +50,0.4545454545454546,4.871486975138818 +51,0.4636363636363637,4.897459142592111 +52,0.4727272727272728,4.9237513989706185 +53,0.4818181818181819,4.949906941376719 +54,0.49090909090909096,4.975468128936716 +55,0.5,4.999999514406058 +56,0.5090909090909091,5.023106113726321 +57,0.5181818181818182,5.044445373097025 +58,0.5272727272727273,5.0637319434235195 +59,0.5363636363636364,5.080735051612946 +60,0.5454545454545454,5.095268903290297 +61,0.5545454545454546,5.107177118584491 +62,0.5636363636363637,5.116312671533183 +63,0.5727272727272729,5.122515175453157 +64,0.5818181818181819,5.125587647033448 +65,0.5909090909090909,5.125275111585327 +66,0.6000000000000001,5.121247595504508 +67,0.6090909090909091,5.113090188198884 +68,0.6181818181818183,5.100302919902715 +69,0.6272727272727273,5.0823131435026045 +70,0.6363636363636364,5.058502854736269 +71,0.6454545454545455,5.028252849936261 +72,0.6545454545454545,4.991004720621448 +73,0.6636363636363637,4.9463403594587545 +74,0.6727272727272727,4.8940768878045136 +75,0.6818181818181819,4.834372763602107 +76,0.690909090909091,4.76783842543011 +77,0.7000000000000001,4.695642399094378 +78,0.7090909090909092,4.619601644755762 +79,0.7181818181818183,4.54224342124791 +80,0.7272727272727273,4.466825474183289 +81,0.7363636363636363,4.397302263077634 +82,0.7454545454545456,4.338227476898954 +83,0.7545454545454546,4.294587329706257 +84,0.7636363636363637,4.271564943821996 +85,0.7727272727272727,4.2742431354937676 +86,0.7818181818181819,4.307260491090497 +87,0.790909090909091,4.374442932499796 +88,0.8,4.478439071597927 +89,0.8090909090909092,4.620391596882326 +90,0.8181818181818182,4.799677908408766 +91,0.8272727272727273,5.013750683325945 +92,0.8363636363636365,5.258102869221434 +93,0.8454545454545456,5.526372086371494 +94,0.8545454545454546,5.810587366747305 +95,0.8636363636363636,6.101547772055301 +96,0.8727272727272729,6.389309198291636 +97,0.881818181818182,6.663744164539262 +98,0.890909090909091,6.9151310487093465 +99,0.9,7.134725186639851 +100,0.9090909090909092,7.315265100150665 +101,0.9181818181818183,7.451372861597662 +102,0.9272727272727274,7.539817611516662 +103,0.9363636363636365,7.579624353320732 +104,0.9454545454545455,7.572024804389464 +105,0.9545454545454546,7.5202615720923 +106,0.9636363636363638,7.429269606715601 +107,0.9727272727272729,7.305268416600594 +108,0.9818181818181819,7.155304033511567 +109,0.990909090909091,6.986780863457752 +110,1.0,6.807020585635382 diff --git a/Template/regression.ipynb b/Template/regression.ipynb index 2132bda..4121a04 100644 --- a/Template/regression.ipynb +++ b/Template/regression.ipynb @@ -210,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -306,7 +306,7 @@ "11 10.0" ] }, - "execution_count": 4, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -317,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -338,7 +338,7 @@ "Name: y, dtype: float64" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -366,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -385,7 +385,7 @@ " 11. ])" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -399,7 +399,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -408,7 +408,7 @@ "111" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -435,7 +435,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -448,16 +448,16 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 9, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" }, @@ -492,7 +492,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -512,7 +512,7 @@ "URL : https://github.com/Bin-Cao/Bgolearn\n", "Citation Format Suggestion:\n", "[Bin CAO et al]. \"Active learning accelerates the discovery of high strength and high ductility lead-free solder alloys\", [2023], [DOI : http://dx.doi.org/10.2139/ssrn.4686075].\n", - "Executed on : 2024-01-06 23:55:38 | Have a great day.\n", + "Executed on : 2024-01-07 08:43:07 | Have a great day.\n", "================================================================================\n", "The internal model is instantiated with optimized homogenous noise\n", "current optimal is : 4.4140625\n",