From b6c729a47b50af35ca69decc5cb48f1321626b84 Mon Sep 17 00:00:00 2001 From: Adam Katz Date: Sun, 26 May 2019 12:04:55 +0300 Subject: [PATCH] Changed load_data to check. If file doesn't exist, fetch from internet. Rename columns fixed. PEP-8 --- adult.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/adult.py b/adult.py index ff89795..899cc0e 100644 --- a/adult.py +++ b/adult.py @@ -78,7 +78,6 @@ def run_a_experiments(): acc, std, time, sparsity, dimensions = cv_binary_classification(model, X, y, continuous, categorical, encoder=BetaEncoder(alpha=alpha_prior, beta=1-alpha_prior), moments='mv') results.append([type(model), 'BetaEncoder (mv)', acc, std, time, sparsity, dimensions]) - file = 'adult_experiments.csv' with open(file, "w") as output: writer = csv.writer(output, lineterminator='\n') @@ -88,8 +87,10 @@ def run_a_experiments(): except: print("File Not Uploaded") -def load_data(local=False): - if not local: + +def load_data(): + exists = os.path.isfile('adult_raw.csv') + if not exists: url="https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data" r=requests.get(url).content df=pd.read_csv(io.StringIO(r.decode('utf-8')),header=None) @@ -113,13 +114,13 @@ def load_data(local=False): 'hours-per-week', 'native-country', 'class'] - new_name = dict(enumerate(names)) - + new_name = {str(k): v for k, v in dict(enumerate(names)).items()} df = df.rename(new_name,axis='columns') df['class'] = (df['class']==' >50K').astype(int) return df + if __name__ == '__main__': run_a_experiments()